databricks-sql-connector 3.3.0__tar.gz → 3.5.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (59) hide show
  1. {databricks_sql_connector-3.3.0 → databricks_sql_connector-3.5.0}/CHANGELOG.md +11 -0
  2. {databricks_sql_connector-3.3.0 → databricks_sql_connector-3.5.0}/PKG-INFO +8 -9
  3. {databricks_sql_connector-3.3.0 → databricks_sql_connector-3.5.0}/README.md +5 -7
  4. {databricks_sql_connector-3.3.0 → databricks_sql_connector-3.5.0}/pyproject.toml +2 -2
  5. {databricks_sql_connector-3.3.0 → databricks_sql_connector-3.5.0}/src/databricks/sql/__init__.py +1 -1
  6. {databricks_sql_connector-3.3.0 → databricks_sql_connector-3.5.0}/src/databricks/sql/auth/auth.py +14 -1
  7. {databricks_sql_connector-3.3.0 → databricks_sql_connector-3.5.0}/src/databricks/sql/auth/thrift_http_client.py +25 -16
  8. {databricks_sql_connector-3.3.0 → databricks_sql_connector-3.5.0}/src/databricks/sql/client.py +110 -12
  9. {databricks_sql_connector-3.3.0 → databricks_sql_connector-3.5.0}/src/databricks/sql/cloudfetch/download_manager.py +5 -4
  10. {databricks_sql_connector-3.3.0 → databricks_sql_connector-3.5.0}/src/databricks/sql/cloudfetch/downloader.py +5 -7
  11. {databricks_sql_connector-3.3.0 → databricks_sql_connector-3.5.0}/src/databricks/sql/thrift_backend.py +22 -45
  12. {databricks_sql_connector-3.3.0 → databricks_sql_connector-3.5.0}/src/databricks/sql/types.py +48 -0
  13. {databricks_sql_connector-3.3.0 → databricks_sql_connector-3.5.0}/src/databricks/sql/utils.py +168 -26
  14. {databricks_sql_connector-3.3.0 → databricks_sql_connector-3.5.0}/LICENSE +0 -0
  15. {databricks_sql_connector-3.3.0 → databricks_sql_connector-3.5.0}/src/databricks/__init__.py +0 -0
  16. {databricks_sql_connector-3.3.0 → databricks_sql_connector-3.5.0}/src/databricks/sql/auth/__init__.py +0 -0
  17. {databricks_sql_connector-3.3.0 → databricks_sql_connector-3.5.0}/src/databricks/sql/auth/authenticators.py +0 -0
  18. {databricks_sql_connector-3.3.0 → databricks_sql_connector-3.5.0}/src/databricks/sql/auth/endpoint.py +0 -0
  19. {databricks_sql_connector-3.3.0 → databricks_sql_connector-3.5.0}/src/databricks/sql/auth/oauth.py +0 -0
  20. {databricks_sql_connector-3.3.0 → databricks_sql_connector-3.5.0}/src/databricks/sql/auth/oauth_http_handler.py +0 -0
  21. {databricks_sql_connector-3.3.0 → databricks_sql_connector-3.5.0}/src/databricks/sql/auth/retry.py +0 -0
  22. {databricks_sql_connector-3.3.0 → databricks_sql_connector-3.5.0}/src/databricks/sql/exc.py +0 -0
  23. {databricks_sql_connector-3.3.0 → databricks_sql_connector-3.5.0}/src/databricks/sql/experimental/__init__.py +0 -0
  24. {databricks_sql_connector-3.3.0 → databricks_sql_connector-3.5.0}/src/databricks/sql/experimental/oauth_persistence.py +0 -0
  25. {databricks_sql_connector-3.3.0 → databricks_sql_connector-3.5.0}/src/databricks/sql/parameters/__init__.py +0 -0
  26. {databricks_sql_connector-3.3.0 → databricks_sql_connector-3.5.0}/src/databricks/sql/parameters/native.py +0 -0
  27. {databricks_sql_connector-3.3.0 → databricks_sql_connector-3.5.0}/src/databricks/sql/parameters/py.typed +0 -0
  28. {databricks_sql_connector-3.3.0 → databricks_sql_connector-3.5.0}/src/databricks/sql/py.typed +0 -0
  29. {databricks_sql_connector-3.3.0 → databricks_sql_connector-3.5.0}/src/databricks/sql/thrift_api/TCLIService/TCLIService-remote +0 -0
  30. {databricks_sql_connector-3.3.0 → databricks_sql_connector-3.5.0}/src/databricks/sql/thrift_api/TCLIService/TCLIService.py +0 -0
  31. {databricks_sql_connector-3.3.0 → databricks_sql_connector-3.5.0}/src/databricks/sql/thrift_api/TCLIService/__init__.py +0 -0
  32. {databricks_sql_connector-3.3.0 → databricks_sql_connector-3.5.0}/src/databricks/sql/thrift_api/TCLIService/constants.py +0 -0
  33. {databricks_sql_connector-3.3.0 → databricks_sql_connector-3.5.0}/src/databricks/sql/thrift_api/TCLIService/ttypes.py +0 -0
  34. {databricks_sql_connector-3.3.0 → databricks_sql_connector-3.5.0}/src/databricks/sql/thrift_api/__init__.py +0 -0
  35. {databricks_sql_connector-3.3.0 → databricks_sql_connector-3.5.0}/src/databricks/sqlalchemy/README.sqlalchemy.md +0 -0
  36. {databricks_sql_connector-3.3.0 → databricks_sql_connector-3.5.0}/src/databricks/sqlalchemy/README.tests.md +0 -0
  37. {databricks_sql_connector-3.3.0 → databricks_sql_connector-3.5.0}/src/databricks/sqlalchemy/__init__.py +0 -0
  38. {databricks_sql_connector-3.3.0 → databricks_sql_connector-3.5.0}/src/databricks/sqlalchemy/_ddl.py +0 -0
  39. {databricks_sql_connector-3.3.0 → databricks_sql_connector-3.5.0}/src/databricks/sqlalchemy/_parse.py +0 -0
  40. {databricks_sql_connector-3.3.0 → databricks_sql_connector-3.5.0}/src/databricks/sqlalchemy/_types.py +0 -0
  41. {databricks_sql_connector-3.3.0 → databricks_sql_connector-3.5.0}/src/databricks/sqlalchemy/base.py +0 -0
  42. {databricks_sql_connector-3.3.0 → databricks_sql_connector-3.5.0}/src/databricks/sqlalchemy/py.typed +0 -0
  43. {databricks_sql_connector-3.3.0 → databricks_sql_connector-3.5.0}/src/databricks/sqlalchemy/requirements.py +0 -0
  44. {databricks_sql_connector-3.3.0 → databricks_sql_connector-3.5.0}/src/databricks/sqlalchemy/setup.cfg +0 -0
  45. {databricks_sql_connector-3.3.0 → databricks_sql_connector-3.5.0}/src/databricks/sqlalchemy/test/_extra.py +0 -0
  46. {databricks_sql_connector-3.3.0 → databricks_sql_connector-3.5.0}/src/databricks/sqlalchemy/test/_future.py +0 -0
  47. {databricks_sql_connector-3.3.0 → databricks_sql_connector-3.5.0}/src/databricks/sqlalchemy/test/_regression.py +0 -0
  48. {databricks_sql_connector-3.3.0 → databricks_sql_connector-3.5.0}/src/databricks/sqlalchemy/test/_unsupported.py +0 -0
  49. {databricks_sql_connector-3.3.0 → databricks_sql_connector-3.5.0}/src/databricks/sqlalchemy/test/conftest.py +0 -0
  50. {databricks_sql_connector-3.3.0 → databricks_sql_connector-3.5.0}/src/databricks/sqlalchemy/test/overrides/_componentreflectiontest.py +0 -0
  51. {databricks_sql_connector-3.3.0 → databricks_sql_connector-3.5.0}/src/databricks/sqlalchemy/test/overrides/_ctetest.py +0 -0
  52. {databricks_sql_connector-3.3.0 → databricks_sql_connector-3.5.0}/src/databricks/sqlalchemy/test/test_suite.py +0 -0
  53. {databricks_sql_connector-3.3.0 → databricks_sql_connector-3.5.0}/src/databricks/sqlalchemy/test_local/__init__.py +0 -0
  54. {databricks_sql_connector-3.3.0 → databricks_sql_connector-3.5.0}/src/databricks/sqlalchemy/test_local/conftest.py +0 -0
  55. {databricks_sql_connector-3.3.0 → databricks_sql_connector-3.5.0}/src/databricks/sqlalchemy/test_local/e2e/MOCK_DATA.xlsx +0 -0
  56. {databricks_sql_connector-3.3.0 → databricks_sql_connector-3.5.0}/src/databricks/sqlalchemy/test_local/e2e/test_basic.py +0 -0
  57. {databricks_sql_connector-3.3.0 → databricks_sql_connector-3.5.0}/src/databricks/sqlalchemy/test_local/test_ddl.py +0 -0
  58. {databricks_sql_connector-3.3.0 → databricks_sql_connector-3.5.0}/src/databricks/sqlalchemy/test_local/test_parsing.py +0 -0
  59. {databricks_sql_connector-3.3.0 → databricks_sql_connector-3.5.0}/src/databricks/sqlalchemy/test_local/test_types.py +0 -0
@@ -1,5 +1,16 @@
1
1
  # Release History
2
2
 
3
+ # 3.5.0 (2024-10-18)
4
+
5
+ - Create a non pyarrow flow to handle small results for the column set (databricks/databricks-sql-python#440 by @jprakash-db)
6
+ - Fix: On non-retryable error, ensure PySQL includes useful information in error (databricks/databricks-sql-python#447 by @shivam2680)
7
+
8
+ # 3.4.0 (2024-08-27)
9
+
10
+ - Unpin pandas to support v2.2.2 (databricks/databricks-sql-python#416 by @kfollesdal)
11
+ - Make OAuth as the default authenticator if no authentication setting is provided (databricks/databricks-sql-python#419 by @jackyhu-db)
12
+ - Fix (regression): use SSL options with HTTPS connection pool (databricks/databricks-sql-python#425 by @kravets-levko)
13
+
3
14
  # 3.3.0 (2024-07-18)
4
15
 
5
16
  - Don't retry requests that fail with HTTP code 401 (databricks/databricks-sql-python#408 by @Hodnebo)
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: databricks-sql-connector
3
- Version: 3.3.0
3
+ Version: 3.5.0
4
4
  Summary: Databricks SQL Connector for Python
5
5
  License: Apache-2.0
6
6
  Author: Databricks
@@ -13,6 +13,7 @@ Classifier: Programming Language :: Python :: 3.9
13
13
  Classifier: Programming Language :: Python :: 3.10
14
14
  Classifier: Programming Language :: Python :: 3.11
15
15
  Classifier: Programming Language :: Python :: 3.12
16
+ Classifier: Programming Language :: Python :: 3.13
16
17
  Provides-Extra: alembic
17
18
  Provides-Extra: sqlalchemy
18
19
  Requires-Dist: alembic (>=1.0.11,<2.0.0) ; extra == "alembic"
@@ -21,7 +22,7 @@ Requires-Dist: numpy (>=1.16.6,<2.0.0) ; python_version >= "3.8" and python_vers
21
22
  Requires-Dist: numpy (>=1.23.4,<2.0.0) ; python_version >= "3.11"
22
23
  Requires-Dist: oauthlib (>=3.1.0,<4.0.0)
23
24
  Requires-Dist: openpyxl (>=3.0.10,<4.0.0)
24
- Requires-Dist: pandas (>=1.2.5,<2.2.0) ; python_version >= "3.8"
25
+ Requires-Dist: pandas (>=1.2.5,<2.3.0) ; python_version >= "3.8"
25
26
  Requires-Dist: pyarrow (>=14.0.1,<17)
26
27
  Requires-Dist: requests (>=2.18.1,<3.0.0)
27
28
  Requires-Dist: sqlalchemy (>=2.0.21) ; extra == "sqlalchemy" or extra == "alembic"
@@ -57,12 +58,9 @@ For the latest documentation, see
57
58
 
58
59
  Install the library with `pip install databricks-sql-connector`
59
60
 
60
- Note: Don't hard-code authentication secrets into your Python. Use environment variables
61
-
62
61
  ```bash
63
62
  export DATABRICKS_HOST=********.databricks.com
64
63
  export DATABRICKS_HTTP_PATH=/sql/1.0/endpoints/****************
65
- export DATABRICKS_TOKEN=dapi********************************
66
64
  ```
67
65
 
68
66
  Example usage:
@@ -72,12 +70,10 @@ from databricks import sql
72
70
 
73
71
  host = os.getenv("DATABRICKS_HOST")
74
72
  http_path = os.getenv("DATABRICKS_HTTP_PATH")
75
- access_token = os.getenv("DATABRICKS_TOKEN")
76
73
 
77
74
  connection = sql.connect(
78
75
  server_hostname=host,
79
- http_path=http_path,
80
- access_token=access_token)
76
+ http_path=http_path)
81
77
 
82
78
  cursor = connection.cursor()
83
79
  cursor.execute('SELECT :param `p`, * FROM RANGE(10)', {"param": "foo"})
@@ -93,7 +89,10 @@ In the above example:
93
89
  - `server-hostname` is the Databricks instance host name.
94
90
  - `http-path` is the HTTP Path either to a Databricks SQL endpoint (e.g. /sql/1.0/endpoints/1234567890abcdef),
95
91
  or to a Databricks Runtime interactive cluster (e.g. /sql/protocolv1/o/1234567890123456/1234-123456-slid123)
96
- - `personal-access-token` is the Databricks Personal Access Token for the account that will execute commands and queries
92
+
93
+ > Note: This example uses [Databricks OAuth U2M](https://docs.databricks.com/en/dev-tools/auth/oauth-u2m.html)
94
+ > to authenticate the target Databricks user account and needs to open the browser for authentication. So it
95
+ > can only run on the user's machine.
97
96
 
98
97
 
99
98
  ## Contributing
@@ -24,12 +24,9 @@ For the latest documentation, see
24
24
 
25
25
  Install the library with `pip install databricks-sql-connector`
26
26
 
27
- Note: Don't hard-code authentication secrets into your Python. Use environment variables
28
-
29
27
  ```bash
30
28
  export DATABRICKS_HOST=********.databricks.com
31
29
  export DATABRICKS_HTTP_PATH=/sql/1.0/endpoints/****************
32
- export DATABRICKS_TOKEN=dapi********************************
33
30
  ```
34
31
 
35
32
  Example usage:
@@ -39,12 +36,10 @@ from databricks import sql
39
36
 
40
37
  host = os.getenv("DATABRICKS_HOST")
41
38
  http_path = os.getenv("DATABRICKS_HTTP_PATH")
42
- access_token = os.getenv("DATABRICKS_TOKEN")
43
39
 
44
40
  connection = sql.connect(
45
41
  server_hostname=host,
46
- http_path=http_path,
47
- access_token=access_token)
42
+ http_path=http_path)
48
43
 
49
44
  cursor = connection.cursor()
50
45
  cursor.execute('SELECT :param `p`, * FROM RANGE(10)', {"param": "foo"})
@@ -60,7 +55,10 @@ In the above example:
60
55
  - `server-hostname` is the Databricks instance host name.
61
56
  - `http-path` is the HTTP Path either to a Databricks SQL endpoint (e.g. /sql/1.0/endpoints/1234567890abcdef),
62
57
  or to a Databricks Runtime interactive cluster (e.g. /sql/protocolv1/o/1234567890123456/1234-123456-slid123)
63
- - `personal-access-token` is the Databricks Personal Access Token for the account that will execute commands and queries
58
+
59
+ > Note: This example uses [Databricks OAuth U2M](https://docs.databricks.com/en/dev-tools/auth/oauth-u2m.html)
60
+ > to authenticate the target Databricks user account and needs to open the browser for authentication. So it
61
+ > can only run on the user's machine.
64
62
 
65
63
 
66
64
  ## Contributing
@@ -1,6 +1,6 @@
1
1
  [tool.poetry]
2
2
  name = "databricks-sql-connector"
3
- version = "3.3.0"
3
+ version = "3.5.0"
4
4
  description = "Databricks SQL Connector for Python"
5
5
  authors = ["Databricks <databricks-sql-connector-maintainers@databricks.com>"]
6
6
  license = "Apache-2.0"
@@ -12,7 +12,7 @@ include = ["CHANGELOG.md"]
12
12
  python = "^3.8.0"
13
13
  thrift = ">=0.16.0,<0.21.0"
14
14
  pandas = [
15
- { version = ">=1.2.5,<2.2.0", python = ">=3.8" }
15
+ { version = ">=1.2.5,<2.3.0", python = ">=3.8" }
16
16
  ]
17
17
  pyarrow = ">=14.0.1,<17"
18
18
 
@@ -68,7 +68,7 @@ DATETIME = DBAPITypeObject("timestamp")
68
68
  DATE = DBAPITypeObject("date")
69
69
  ROWID = DBAPITypeObject()
70
70
 
71
- __version__ = "3.3.0"
71
+ __version__ = "3.5.0"
72
72
  USER_AGENT_NAME = "PyDatabricksSqlConnector"
73
73
 
74
74
  # These two functions are pyhive legacy
@@ -64,7 +64,20 @@ def get_auth_provider(cfg: ClientContext):
64
64
  # no op authenticator. authentication is performed using ssl certificate outside of headers
65
65
  return AuthProvider()
66
66
  else:
67
- raise RuntimeError("No valid authentication settings!")
67
+ if (
68
+ cfg.oauth_redirect_port_range is not None
69
+ and cfg.oauth_client_id is not None
70
+ and cfg.oauth_scopes is not None
71
+ ):
72
+ return DatabricksOAuthProvider(
73
+ cfg.hostname,
74
+ cfg.oauth_persistence,
75
+ cfg.oauth_redirect_port_range,
76
+ cfg.oauth_client_id,
77
+ cfg.oauth_scopes,
78
+ )
79
+ else:
80
+ raise RuntimeError("No valid authentication settings!")
68
81
 
69
82
 
70
83
  PYSQL_OAUTH_SCOPES = ["sql", "offline_access"]
@@ -1,13 +1,11 @@
1
1
  import base64
2
2
  import logging
3
3
  import urllib.parse
4
- from typing import Dict, Union
4
+ from typing import Dict, Union, Optional
5
5
 
6
6
  import six
7
7
  import thrift
8
8
 
9
- logger = logging.getLogger(__name__)
10
-
11
9
  import ssl
12
10
  import warnings
13
11
  from http.client import HTTPResponse
@@ -16,6 +14,9 @@ from io import BytesIO
16
14
  from urllib3 import HTTPConnectionPool, HTTPSConnectionPool, ProxyManager
17
15
  from urllib3.util import make_headers
18
16
  from databricks.sql.auth.retry import CommandType, DatabricksRetryPolicy
17
+ from databricks.sql.types import SSLOptions
18
+
19
+ logger = logging.getLogger(__name__)
19
20
 
20
21
 
21
22
  class THttpClient(thrift.transport.THttpClient.THttpClient):
@@ -25,13 +26,12 @@ class THttpClient(thrift.transport.THttpClient.THttpClient):
25
26
  uri_or_host,
26
27
  port=None,
27
28
  path=None,
28
- cafile=None,
29
- cert_file=None,
30
- key_file=None,
31
- ssl_context=None,
29
+ ssl_options: Optional[SSLOptions] = None,
32
30
  max_connections: int = 1,
33
31
  retry_policy: Union[DatabricksRetryPolicy, int] = 0,
34
32
  ):
33
+ self._ssl_options = ssl_options
34
+
35
35
  if port is not None:
36
36
  warnings.warn(
37
37
  "Please use the THttpClient('http{s}://host:port/path') constructor",
@@ -48,13 +48,11 @@ class THttpClient(thrift.transport.THttpClient.THttpClient):
48
48
  self.scheme = parsed.scheme
49
49
  assert self.scheme in ("http", "https")
50
50
  if self.scheme == "https":
51
- self.certfile = cert_file
52
- self.keyfile = key_file
53
- self.context = (
54
- ssl.create_default_context(cafile=cafile)
55
- if (cafile and not ssl_context)
56
- else ssl_context
57
- )
51
+ if self._ssl_options is not None:
52
+ # TODO: Not sure if those options are used anywhere - need to double-check
53
+ self.certfile = self._ssl_options.tls_client_cert_file
54
+ self.keyfile = self._ssl_options.tls_client_cert_key_file
55
+ self.context = self._ssl_options.create_ssl_context()
58
56
  self.port = parsed.port
59
57
  self.host = parsed.hostname
60
58
  self.path = parsed.path
@@ -109,12 +107,23 @@ class THttpClient(thrift.transport.THttpClient.THttpClient):
109
107
  def open(self):
110
108
 
111
109
  # self.__pool replaces the self.__http used by the original THttpClient
110
+ _pool_kwargs = {"maxsize": self.max_connections}
111
+
112
112
  if self.scheme == "http":
113
113
  pool_class = HTTPConnectionPool
114
114
  elif self.scheme == "https":
115
115
  pool_class = HTTPSConnectionPool
116
-
117
- _pool_kwargs = {"maxsize": self.max_connections}
116
+ _pool_kwargs.update(
117
+ {
118
+ "cert_reqs": ssl.CERT_REQUIRED
119
+ if self._ssl_options.tls_verify
120
+ else ssl.CERT_NONE,
121
+ "ca_certs": self._ssl_options.tls_trusted_ca_file,
122
+ "cert_file": self._ssl_options.tls_client_cert_file,
123
+ "key_file": self._ssl_options.tls_client_cert_key_file,
124
+ "key_password": self._ssl_options.tls_client_cert_key_password,
125
+ }
126
+ )
118
127
 
119
128
  if self.using_proxy():
120
129
  proxy_manager = ProxyManager(
@@ -1,7 +1,11 @@
1
1
  from typing import Dict, Tuple, List, Optional, Any, Union, Sequence
2
2
 
3
3
  import pandas
4
- import pyarrow
4
+
5
+ try:
6
+ import pyarrow
7
+ except ImportError:
8
+ pyarrow = None
5
9
  import requests
6
10
  import json
7
11
  import os
@@ -22,6 +26,8 @@ from databricks.sql.utils import (
22
26
  ParamEscaper,
23
27
  inject_parameters,
24
28
  transform_paramstyle,
29
+ ColumnTable,
30
+ ColumnQueue,
25
31
  )
26
32
  from databricks.sql.parameters.native import (
27
33
  DbsqlParameterBase,
@@ -35,7 +41,7 @@ from databricks.sql.parameters.native import (
35
41
  )
36
42
 
37
43
 
38
- from databricks.sql.types import Row
44
+ from databricks.sql.types import Row, SSLOptions
39
45
  from databricks.sql.auth.auth import get_python_sql_connector_auth_provider
40
46
  from databricks.sql.experimental.oauth_persistence import OAuthPersistence
41
47
 
@@ -96,7 +102,7 @@ class Connection:
96
102
  sanitise parameterized inputs to prevent SQL injection. The inline parameter approach is maintained for
97
103
  legacy purposes and will be deprecated in a future release. When this parameter is `True` you will see
98
104
  a warning log message. To suppress this log message, set `use_inline_params="silent"`.
99
- auth_type: `str`, optional
105
+ auth_type: `str`, optional (default is databricks-oauth if neither `access_token` nor `tls_client_cert_file` is set)
100
106
  `databricks-oauth` : to use Databricks OAuth with fine-grained permission scopes, set to `databricks-oauth`.
101
107
  `azure-oauth` : to use Microsoft Entra ID OAuth flow, set to `azure-oauth`.
102
108
 
@@ -178,8 +184,9 @@ class Connection:
178
184
  # _tls_trusted_ca_file
179
185
  # Set to the path of the file containing trusted CA certificates for server certificate
180
186
  # verification. If not provide, uses system truststore.
181
- # _tls_client_cert_file, _tls_client_cert_key_file
187
+ # _tls_client_cert_file, _tls_client_cert_key_file, _tls_client_cert_key_password
182
188
  # Set client SSL certificate.
189
+ # See https://docs.python.org/3/library/ssl.html#ssl.SSLContext.load_cert_chain
183
190
  # _retry_stop_after_attempts_count
184
191
  # The maximum number of attempts during a request retry sequence (defaults to 24)
185
192
  # _socket_timeout
@@ -220,12 +227,25 @@ class Connection:
220
227
 
221
228
  base_headers = [("User-Agent", useragent_header)]
222
229
 
230
+ self._ssl_options = SSLOptions(
231
+ # Double negation is generally a bad thing, but we have to keep backward compatibility
232
+ tls_verify=not kwargs.get(
233
+ "_tls_no_verify", False
234
+ ), # by default - verify cert and host
235
+ tls_verify_hostname=kwargs.get("_tls_verify_hostname", True),
236
+ tls_trusted_ca_file=kwargs.get("_tls_trusted_ca_file"),
237
+ tls_client_cert_file=kwargs.get("_tls_client_cert_file"),
238
+ tls_client_cert_key_file=kwargs.get("_tls_client_cert_key_file"),
239
+ tls_client_cert_key_password=kwargs.get("_tls_client_cert_key_password"),
240
+ )
241
+
223
242
  self.thrift_backend = ThriftBackend(
224
243
  self.host,
225
244
  self.port,
226
245
  http_path,
227
246
  (http_headers or []) + base_headers,
228
247
  auth_provider,
248
+ ssl_options=self._ssl_options,
229
249
  _use_arrow_native_complex_types=_use_arrow_native_complex_types,
230
250
  **kwargs,
231
251
  )
@@ -977,14 +997,14 @@ class Cursor:
977
997
  else:
978
998
  raise Error("There is no active result set")
979
999
 
980
- def fetchall_arrow(self) -> pyarrow.Table:
1000
+ def fetchall_arrow(self) -> "pyarrow.Table":
981
1001
  self._check_not_closed()
982
1002
  if self.active_result_set:
983
1003
  return self.active_result_set.fetchall_arrow()
984
1004
  else:
985
1005
  raise Error("There is no active result set")
986
1006
 
987
- def fetchmany_arrow(self, size) -> pyarrow.Table:
1007
+ def fetchmany_arrow(self, size) -> "pyarrow.Table":
988
1008
  self._check_not_closed()
989
1009
  if self.active_result_set:
990
1010
  return self.active_result_set.fetchmany_arrow(size)
@@ -1129,6 +1149,18 @@ class ResultSet:
1129
1149
  self.results = results
1130
1150
  self.has_more_rows = has_more_rows
1131
1151
 
1152
+ def _convert_columnar_table(self, table):
1153
+ column_names = [c[0] for c in self.description]
1154
+ ResultRow = Row(*column_names)
1155
+ result = []
1156
+ for row_index in range(table.num_rows):
1157
+ curr_row = []
1158
+ for col_index in range(table.num_columns):
1159
+ curr_row.append(table.get_item(col_index, row_index))
1160
+ result.append(ResultRow(*curr_row))
1161
+
1162
+ return result
1163
+
1132
1164
  def _convert_arrow_table(self, table):
1133
1165
  column_names = [c[0] for c in self.description]
1134
1166
  ResultRow = Row(*column_names)
@@ -1164,14 +1196,14 @@ class ResultSet:
1164
1196
  timestamp_as_object=True,
1165
1197
  )
1166
1198
 
1167
- res = df.to_numpy(na_value=None)
1199
+ res = df.to_numpy(na_value=None, dtype="object")
1168
1200
  return [ResultRow(*v) for v in res]
1169
1201
 
1170
1202
  @property
1171
1203
  def rownumber(self):
1172
1204
  return self._next_row_index
1173
1205
 
1174
- def fetchmany_arrow(self, size: int) -> pyarrow.Table:
1206
+ def fetchmany_arrow(self, size: int) -> "pyarrow.Table":
1175
1207
  """
1176
1208
  Fetch the next set of rows of a query result, returning a PyArrow table.
1177
1209
 
@@ -1196,7 +1228,49 @@ class ResultSet:
1196
1228
 
1197
1229
  return results
1198
1230
 
1199
- def fetchall_arrow(self) -> pyarrow.Table:
1231
+ def merge_columnar(self, result1, result2):
1232
+ """
1233
+ Function to merge / combining the columnar results into a single result
1234
+ :param result1:
1235
+ :param result2:
1236
+ :return:
1237
+ """
1238
+
1239
+ if result1.column_names != result2.column_names:
1240
+ raise ValueError("The columns in the results don't match")
1241
+
1242
+ merged_result = [
1243
+ result1.column_table[i] + result2.column_table[i]
1244
+ for i in range(result1.num_columns)
1245
+ ]
1246
+ return ColumnTable(merged_result, result1.column_names)
1247
+
1248
+ def fetchmany_columnar(self, size: int):
1249
+ """
1250
+ Fetch the next set of rows of a query result, returning a Columnar Table.
1251
+ An empty sequence is returned when no more rows are available.
1252
+ """
1253
+ if size < 0:
1254
+ raise ValueError("size argument for fetchmany is %s but must be >= 0", size)
1255
+
1256
+ results = self.results.next_n_rows(size)
1257
+ n_remaining_rows = size - results.num_rows
1258
+ self._next_row_index += results.num_rows
1259
+
1260
+ while (
1261
+ n_remaining_rows > 0
1262
+ and not self.has_been_closed_server_side
1263
+ and self.has_more_rows
1264
+ ):
1265
+ self._fill_results_buffer()
1266
+ partial_results = self.results.next_n_rows(n_remaining_rows)
1267
+ results = self.merge_columnar(results, partial_results)
1268
+ n_remaining_rows -= partial_results.num_rows
1269
+ self._next_row_index += partial_results.num_rows
1270
+
1271
+ return results
1272
+
1273
+ def fetchall_arrow(self) -> "pyarrow.Table":
1200
1274
  """Fetch all (remaining) rows of a query result, returning them as a PyArrow table."""
1201
1275
  results = self.results.remaining_rows()
1202
1276
  self._next_row_index += results.num_rows
@@ -1209,12 +1283,30 @@ class ResultSet:
1209
1283
 
1210
1284
  return results
1211
1285
 
1286
+ def fetchall_columnar(self):
1287
+ """Fetch all (remaining) rows of a query result, returning them as a Columnar table."""
1288
+ results = self.results.remaining_rows()
1289
+ self._next_row_index += results.num_rows
1290
+
1291
+ while not self.has_been_closed_server_side and self.has_more_rows:
1292
+ self._fill_results_buffer()
1293
+ partial_results = self.results.remaining_rows()
1294
+ results = self.merge_columnar(results, partial_results)
1295
+ self._next_row_index += partial_results.num_rows
1296
+
1297
+ return results
1298
+
1212
1299
  def fetchone(self) -> Optional[Row]:
1213
1300
  """
1214
1301
  Fetch the next row of a query result set, returning a single sequence,
1215
1302
  or None when no more data is available.
1216
1303
  """
1217
- res = self._convert_arrow_table(self.fetchmany_arrow(1))
1304
+
1305
+ if isinstance(self.results, ColumnQueue):
1306
+ res = self._convert_columnar_table(self.fetchmany_columnar(1))
1307
+ else:
1308
+ res = self._convert_arrow_table(self.fetchmany_arrow(1))
1309
+
1218
1310
  if len(res) > 0:
1219
1311
  return res[0]
1220
1312
  else:
@@ -1224,7 +1316,10 @@ class ResultSet:
1224
1316
  """
1225
1317
  Fetch all (remaining) rows of a query result, returning them as a list of rows.
1226
1318
  """
1227
- return self._convert_arrow_table(self.fetchall_arrow())
1319
+ if isinstance(self.results, ColumnQueue):
1320
+ return self._convert_columnar_table(self.fetchall_columnar())
1321
+ else:
1322
+ return self._convert_arrow_table(self.fetchall_arrow())
1228
1323
 
1229
1324
  def fetchmany(self, size: int) -> List[Row]:
1230
1325
  """
@@ -1232,7 +1327,10 @@ class ResultSet:
1232
1327
 
1233
1328
  An empty sequence is returned when no more rows are available.
1234
1329
  """
1235
- return self._convert_arrow_table(self.fetchmany_arrow(size))
1330
+ if isinstance(self.results, ColumnQueue):
1331
+ return self._convert_columnar_table(self.fetchmany_columnar(size))
1332
+ else:
1333
+ return self._convert_arrow_table(self.fetchmany_arrow(size))
1236
1334
 
1237
1335
  def close(self) -> None:
1238
1336
  """
@@ -1,6 +1,5 @@
1
1
  import logging
2
2
 
3
- from ssl import SSLContext
4
3
  from concurrent.futures import ThreadPoolExecutor, Future
5
4
  from typing import List, Union
6
5
 
@@ -9,6 +8,8 @@ from databricks.sql.cloudfetch.downloader import (
9
8
  DownloadableResultSettings,
10
9
  DownloadedFile,
11
10
  )
11
+ from databricks.sql.types import SSLOptions
12
+
12
13
  from databricks.sql.thrift_api.TCLIService.ttypes import TSparkArrowResultLink
13
14
 
14
15
  logger = logging.getLogger(__name__)
@@ -20,7 +21,7 @@ class ResultFileDownloadManager:
20
21
  links: List[TSparkArrowResultLink],
21
22
  max_download_threads: int,
22
23
  lz4_compressed: bool,
23
- ssl_context: SSLContext,
24
+ ssl_options: SSLOptions,
24
25
  ):
25
26
  self._pending_links: List[TSparkArrowResultLink] = []
26
27
  for link in links:
@@ -38,7 +39,7 @@ class ResultFileDownloadManager:
38
39
  self._thread_pool = ThreadPoolExecutor(max_workers=self._max_download_threads)
39
40
 
40
41
  self._downloadable_result_settings = DownloadableResultSettings(lz4_compressed)
41
- self._ssl_context = ssl_context
42
+ self._ssl_options = ssl_options
42
43
 
43
44
  def get_next_downloaded_file(
44
45
  self, next_row_offset: int
@@ -95,7 +96,7 @@ class ResultFileDownloadManager:
95
96
  handler = ResultSetDownloadHandler(
96
97
  settings=self._downloadable_result_settings,
97
98
  link=link,
98
- ssl_context=self._ssl_context,
99
+ ssl_options=self._ssl_options,
99
100
  )
100
101
  task = self._thread_pool.submit(handler.run)
101
102
  self._download_tasks.append(task)
@@ -3,13 +3,12 @@ from dataclasses import dataclass
3
3
 
4
4
  import requests
5
5
  from requests.adapters import HTTPAdapter, Retry
6
- from ssl import SSLContext, CERT_NONE
7
6
  import lz4.frame
8
7
  import time
9
8
 
10
9
  from databricks.sql.thrift_api.TCLIService.ttypes import TSparkArrowResultLink
11
-
12
10
  from databricks.sql.exc import Error
11
+ from databricks.sql.types import SSLOptions
13
12
 
14
13
  logger = logging.getLogger(__name__)
15
14
 
@@ -66,11 +65,11 @@ class ResultSetDownloadHandler:
66
65
  self,
67
66
  settings: DownloadableResultSettings,
68
67
  link: TSparkArrowResultLink,
69
- ssl_context: SSLContext,
68
+ ssl_options: SSLOptions,
70
69
  ):
71
70
  self.settings = settings
72
71
  self.link = link
73
- self._ssl_context = ssl_context
72
+ self._ssl_options = ssl_options
74
73
 
75
74
  def run(self) -> DownloadedFile:
76
75
  """
@@ -95,14 +94,13 @@ class ResultSetDownloadHandler:
95
94
  session.mount("http://", HTTPAdapter(max_retries=retryPolicy))
96
95
  session.mount("https://", HTTPAdapter(max_retries=retryPolicy))
97
96
 
98
- ssl_verify = self._ssl_context.verify_mode != CERT_NONE
99
-
100
97
  try:
101
98
  # Get the file via HTTP request
102
99
  response = session.get(
103
100
  self.link.fileLink,
104
101
  timeout=self.settings.download_timeout,
105
- verify=ssl_verify,
102
+ verify=self._ssl_options.tls_verify,
103
+ # TODO: Pass cert from `self._ssl_options`
106
104
  )
107
105
  response.raise_for_status()
108
106