databricks-sql-connector 3.0.3b1__tar.gz → 3.1.1__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (58) hide show
  1. {databricks_sql_connector-3.0.3b1 → databricks_sql_connector-3.1.1}/CHANGELOG.md +38 -8
  2. {databricks_sql_connector-3.0.3b1 → databricks_sql_connector-3.1.1}/PKG-INFO +2 -1
  3. {databricks_sql_connector-3.0.3b1 → databricks_sql_connector-3.1.1}/pyproject.toml +2 -1
  4. {databricks_sql_connector-3.0.3b1 → databricks_sql_connector-3.1.1}/src/databricks/sql/__init__.py +34 -1
  5. {databricks_sql_connector-3.0.3b1 → databricks_sql_connector-3.1.1}/src/databricks/sql/auth/auth.py +10 -7
  6. {databricks_sql_connector-3.0.3b1 → databricks_sql_connector-3.1.1}/src/databricks/sql/auth/authenticators.py +4 -6
  7. {databricks_sql_connector-3.0.3b1 → databricks_sql_connector-3.1.1}/src/databricks/sql/auth/endpoint.py +30 -7
  8. {databricks_sql_connector-3.0.3b1 → databricks_sql_connector-3.1.1}/src/databricks/sql/auth/retry.py +33 -41
  9. {databricks_sql_connector-3.0.3b1 → databricks_sql_connector-3.1.1}/src/databricks/sql/client.py +25 -11
  10. {databricks_sql_connector-3.0.3b1 → databricks_sql_connector-3.1.1}/src/databricks/sql/thrift_backend.py +2 -2
  11. {databricks_sql_connector-3.0.3b1 → databricks_sql_connector-3.1.1}/src/databricks/sqlalchemy/base.py +1 -0
  12. databricks_sql_connector-3.1.1/src/databricks/sqlalchemy/test_local/conftest.py +44 -0
  13. {databricks_sql_connector-3.0.3b1 → databricks_sql_connector-3.1.1}/src/databricks/sqlalchemy/test_local/e2e/test_basic.py +26 -27
  14. {databricks_sql_connector-3.0.3b1 → databricks_sql_connector-3.1.1}/src/databricks/sqlalchemy/test_local/test_parsing.py +4 -4
  15. databricks_sql_connector-3.0.3b1/src/databricks/sqlalchemy/_pytest.ini +0 -3
  16. {databricks_sql_connector-3.0.3b1 → databricks_sql_connector-3.1.1}/LICENSE +0 -0
  17. {databricks_sql_connector-3.0.3b1 → databricks_sql_connector-3.1.1}/README.md +0 -0
  18. {databricks_sql_connector-3.0.3b1 → databricks_sql_connector-3.1.1}/src/databricks/__init__.py +0 -0
  19. {databricks_sql_connector-3.0.3b1 → databricks_sql_connector-3.1.1}/src/databricks/sql/auth/__init__.py +0 -0
  20. {databricks_sql_connector-3.0.3b1 → databricks_sql_connector-3.1.1}/src/databricks/sql/auth/oauth.py +0 -0
  21. {databricks_sql_connector-3.0.3b1 → databricks_sql_connector-3.1.1}/src/databricks/sql/auth/oauth_http_handler.py +0 -0
  22. {databricks_sql_connector-3.0.3b1 → databricks_sql_connector-3.1.1}/src/databricks/sql/auth/thrift_http_client.py +0 -0
  23. {databricks_sql_connector-3.0.3b1 → databricks_sql_connector-3.1.1}/src/databricks/sql/cloudfetch/download_manager.py +0 -0
  24. {databricks_sql_connector-3.0.3b1 → databricks_sql_connector-3.1.1}/src/databricks/sql/cloudfetch/downloader.py +0 -0
  25. {databricks_sql_connector-3.0.3b1 → databricks_sql_connector-3.1.1}/src/databricks/sql/exc.py +0 -0
  26. {databricks_sql_connector-3.0.3b1 → databricks_sql_connector-3.1.1}/src/databricks/sql/experimental/__init__.py +0 -0
  27. {databricks_sql_connector-3.0.3b1 → databricks_sql_connector-3.1.1}/src/databricks/sql/experimental/oauth_persistence.py +0 -0
  28. {databricks_sql_connector-3.0.3b1 → databricks_sql_connector-3.1.1}/src/databricks/sql/parameters/__init__.py +0 -0
  29. {databricks_sql_connector-3.0.3b1 → databricks_sql_connector-3.1.1}/src/databricks/sql/parameters/native.py +0 -0
  30. {databricks_sql_connector-3.0.3b1 → databricks_sql_connector-3.1.1}/src/databricks/sql/parameters/py.typed +0 -0
  31. {databricks_sql_connector-3.0.3b1 → databricks_sql_connector-3.1.1}/src/databricks/sql/thrift_api/TCLIService/TCLIService-remote +0 -0
  32. {databricks_sql_connector-3.0.3b1 → databricks_sql_connector-3.1.1}/src/databricks/sql/thrift_api/TCLIService/TCLIService.py +0 -0
  33. {databricks_sql_connector-3.0.3b1 → databricks_sql_connector-3.1.1}/src/databricks/sql/thrift_api/TCLIService/__init__.py +0 -0
  34. {databricks_sql_connector-3.0.3b1 → databricks_sql_connector-3.1.1}/src/databricks/sql/thrift_api/TCLIService/constants.py +0 -0
  35. {databricks_sql_connector-3.0.3b1 → databricks_sql_connector-3.1.1}/src/databricks/sql/thrift_api/TCLIService/ttypes.py +0 -0
  36. {databricks_sql_connector-3.0.3b1 → databricks_sql_connector-3.1.1}/src/databricks/sql/thrift_api/__init__.py +0 -0
  37. {databricks_sql_connector-3.0.3b1 → databricks_sql_connector-3.1.1}/src/databricks/sql/types.py +0 -0
  38. {databricks_sql_connector-3.0.3b1 → databricks_sql_connector-3.1.1}/src/databricks/sql/utils.py +0 -0
  39. {databricks_sql_connector-3.0.3b1 → databricks_sql_connector-3.1.1}/src/databricks/sqlalchemy/README.sqlalchemy.md +0 -0
  40. {databricks_sql_connector-3.0.3b1 → databricks_sql_connector-3.1.1}/src/databricks/sqlalchemy/README.tests.md +0 -0
  41. {databricks_sql_connector-3.0.3b1 → databricks_sql_connector-3.1.1}/src/databricks/sqlalchemy/__init__.py +0 -0
  42. {databricks_sql_connector-3.0.3b1 → databricks_sql_connector-3.1.1}/src/databricks/sqlalchemy/_ddl.py +0 -0
  43. {databricks_sql_connector-3.0.3b1 → databricks_sql_connector-3.1.1}/src/databricks/sqlalchemy/_parse.py +0 -0
  44. {databricks_sql_connector-3.0.3b1 → databricks_sql_connector-3.1.1}/src/databricks/sqlalchemy/_types.py +0 -0
  45. {databricks_sql_connector-3.0.3b1 → databricks_sql_connector-3.1.1}/src/databricks/sqlalchemy/requirements.py +0 -0
  46. {databricks_sql_connector-3.0.3b1 → databricks_sql_connector-3.1.1}/src/databricks/sqlalchemy/setup.cfg +0 -0
  47. {databricks_sql_connector-3.0.3b1 → databricks_sql_connector-3.1.1}/src/databricks/sqlalchemy/test/_extra.py +0 -0
  48. {databricks_sql_connector-3.0.3b1 → databricks_sql_connector-3.1.1}/src/databricks/sqlalchemy/test/_future.py +0 -0
  49. {databricks_sql_connector-3.0.3b1 → databricks_sql_connector-3.1.1}/src/databricks/sqlalchemy/test/_regression.py +0 -0
  50. {databricks_sql_connector-3.0.3b1 → databricks_sql_connector-3.1.1}/src/databricks/sqlalchemy/test/_unsupported.py +0 -0
  51. {databricks_sql_connector-3.0.3b1 → databricks_sql_connector-3.1.1}/src/databricks/sqlalchemy/test/conftest.py +0 -0
  52. {databricks_sql_connector-3.0.3b1 → databricks_sql_connector-3.1.1}/src/databricks/sqlalchemy/test/overrides/_componentreflectiontest.py +0 -0
  53. {databricks_sql_connector-3.0.3b1 → databricks_sql_connector-3.1.1}/src/databricks/sqlalchemy/test/overrides/_ctetest.py +0 -0
  54. {databricks_sql_connector-3.0.3b1 → databricks_sql_connector-3.1.1}/src/databricks/sqlalchemy/test/test_suite.py +0 -0
  55. {databricks_sql_connector-3.0.3b1 → databricks_sql_connector-3.1.1}/src/databricks/sqlalchemy/test_local/__init__.py +0 -0
  56. {databricks_sql_connector-3.0.3b1 → databricks_sql_connector-3.1.1}/src/databricks/sqlalchemy/test_local/e2e/MOCK_DATA.xlsx +0 -0
  57. {databricks_sql_connector-3.0.3b1 → databricks_sql_connector-3.1.1}/src/databricks/sqlalchemy/test_local/test_ddl.py +0 -0
  58. {databricks_sql_connector-3.0.3b1 → databricks_sql_connector-3.1.1}/src/databricks/sqlalchemy/test_local/test_types.py +0 -0
@@ -1,8 +1,23 @@
1
1
  # Release History
2
2
 
3
- # 3.0.3b1 (2024-01-29)
3
+ # x.x.x (TBD)
4
4
 
5
- - Fix: Thrift connections would fail if installed `urllib3` was below version `1.26.0`
5
+ # 3.1.1 (2024-03-19)
6
+
7
+ - Don't retry requests that fail with code 403 (#373)
8
+ - Assume a default retry-after for 429/503 (#371)
9
+ - Fix boolean literals (#357)
10
+
11
+ # 3.1.0 (2024-02-16)
12
+
13
+ - Revert retry-after behavior to be exponential backoff (#349)
14
+ - Support Databricks OAuth on Azure (#351)
15
+ - Support Databricks OAuth on GCP (#338)
16
+
17
+ # 3.0.3 (2024-02-02)
18
+
19
+ - Revised docstrings and examples for OAuth (#339)
20
+ - Redact the URL query parameters from the urllib3.connectionpool logs (#341)
6
21
 
7
22
  # 3.0.2 (2024-01-25)
8
23
 
@@ -48,7 +63,7 @@
48
63
 
49
64
  ## 2.9.2 (2023-08-17)
50
65
 
51
- __Note: this release was yanked from Pypi on 13 September 2023 due to compatibility issues with environments where `urllib3<=2.0.0` were installed. The log changes are incorporated into version 2.9.3 and greater.__
66
+ **Note: this release was yanked from Pypi on 13 September 2023 due to compatibility issues with environments where `urllib3<=2.0.0` were installed. The log changes are incorporated into version 2.9.3 and greater.**
52
67
 
53
68
  - Other: Add `examples/v3_retries_query_execute.py` (#199)
54
69
  - Other: suppress log message when `_enable_v3_retries` is not `True` (#199)
@@ -56,7 +71,7 @@ __Note: this release was yanked from Pypi on 13 September 2023 due to compatibil
56
71
 
57
72
  ## 2.9.1 (2023-08-11)
58
73
 
59
- __Note: this release was yanked from Pypi on 13 September 2023 due to compatibility issues with environments where `urllib3<=2.0.0` were installed.__
74
+ **Note: this release was yanked from Pypi on 13 September 2023 due to compatibility issues with environments where `urllib3<=2.0.0` were installed.**
60
75
 
61
76
  - Other: Explicitly pin urllib3 to ^2.0.0 (#191)
62
77
 
@@ -109,6 +124,7 @@ __Note: this release was yanked from Pypi on 13 September 2023 due to compatibil
109
124
  - Other: Relax sqlalchemy required version as it was unecessarily strict.
110
125
 
111
126
  ## 2.5.0 (2023-04-14)
127
+
112
128
  - Add support for External Auth providers
113
129
  - Fix: Python HTTP proxies were broken
114
130
  - Other: All Thrift requests that timeout during connection will be automatically retried
@@ -130,8 +146,8 @@ __Note: this release was yanked from Pypi on 13 September 2023 due to compatibil
130
146
 
131
147
  ## 2.2.2 (2023-01-03)
132
148
 
133
- - Support custom oauth client id and redirect port
134
- - Fix: Add none check on _oauth_persistence in DatabricksOAuthProvider
149
+ - Support custom oauth client id and redirect port
150
+ - Fix: Add none check on \_oauth_persistence in DatabricksOAuthProvider
135
151
 
136
152
  ## 2.2.1 (2022-11-29)
137
153
 
@@ -163,57 +179,71 @@ Huge thanks to @dbaxa for contributing this change!
163
179
 
164
180
  - Add retry logic for `GetOperationStatus` requests that fail with an `OSError`
165
181
  - Reorganised code to use Poetry for dependency management.
182
+
166
183
  ## 2.0.2 (2022-05-04)
184
+
167
185
  - Better exception handling in automatic connection close
168
186
 
169
187
  ## 2.0.1 (2022-04-21)
188
+
170
189
  - Fixed Pandas dependency in setup.cfg to be >= 1.2.0
171
190
 
172
191
  ## 2.0.0 (2022-04-19)
192
+
173
193
  - Initial stable release of V2
174
- - Added better support for complex types, so that in Databricks runtime 10.3+, Arrays, Maps and Structs will get
194
+ - Added better support for complex types, so that in Databricks runtime 10.3+, Arrays, Maps and Structs will get
175
195
  deserialized as lists, lists of tuples and dicts, respectively.
176
196
  - Changed the name of the metadata arg to http_headers
177
197
 
178
198
  ## 2.0.b2 (2022-04-04)
199
+
179
200
  - Change import of collections.Iterable to collections.abc.Iterable to make the library compatible with Python 3.10
180
201
  - Fixed bug with .tables method so that .tables works as expected with Unity-Catalog enabled endpoints
181
202
 
182
203
  ## 2.0.0b1 (2022-03-04)
204
+
183
205
  - Fix packaging issue (dependencies were not being installed properly)
184
206
  - Fetching timestamp results will now return aware instead of naive timestamps
185
207
  - The client will now default to using simplified error messages
186
208
 
187
209
  ## 2.0.0b (2022-02-08)
210
+
188
211
  - Initial beta release of V2. V2 is an internal re-write of large parts of the connector to use Databricks edge features. All public APIs from V1 remain.
189
- - Added Unity Catalog support (pass catalog and / or schema key word args to the .connect method to select initial schema and catalog)
212
+ - Added Unity Catalog support (pass catalog and / or schema key word args to the .connect method to select initial schema and catalog)
190
213
 
191
214
  ---
192
215
 
193
216
  **Note**: The code for versions prior to `v2.0.0b` is not contained in this repository. The below entries are included for reference only.
194
217
 
195
218
  ---
219
+
196
220
  ## 1.0.0 (2022-01-20)
221
+
197
222
  - Add operations for retrieving metadata
198
223
  - Add the ability to access columns by name on result rows
199
224
  - Add the ability to provide configuration settings on connect
200
225
 
201
226
  ## 0.9.4 (2022-01-10)
227
+
202
228
  - Improved logging and error messages.
203
229
 
204
230
  ## 0.9.3 (2021-12-08)
231
+
205
232
  - Add retries for 429 and 503 HTTP responses.
206
233
 
207
234
  ## 0.9.2 (2021-12-02)
235
+
208
236
  - (Bug fix) Increased Thrift requirement from 0.10.0 to 0.13.0 as 0.10.0 was in fact incompatible
209
237
  - (Bug fix) Fixed error message after query execution failed -SQLSTATE and Error message were misplaced
210
238
 
211
239
  ## 0.9.1 (2021-09-01)
240
+
212
241
  - Public Preview release, Experimental tag removed
213
242
  - minor updates in internal build/packaging
214
243
  - no functional changes
215
244
 
216
245
  ## 0.9.0 (2021-08-04)
246
+
217
247
  - initial (Experimental) release of pyhive-forked connector
218
248
  - Python DBAPI 2.0 (PEP-0249), thrift based
219
249
  - see docs for more info: https://docs.databricks.com/dev-tools/python-sql-connector.html
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: databricks-sql-connector
3
- Version: 3.0.3b1
3
+ Version: 3.1.1
4
4
  Summary: Databricks SQL Connector for Python
5
5
  License: Apache-2.0
6
6
  Author: Databricks
@@ -12,6 +12,7 @@ Classifier: Programming Language :: Python :: 3.8
12
12
  Classifier: Programming Language :: Python :: 3.9
13
13
  Classifier: Programming Language :: Python :: 3.10
14
14
  Classifier: Programming Language :: Python :: 3.11
15
+ Classifier: Programming Language :: Python :: 3.12
15
16
  Provides-Extra: alembic
16
17
  Provides-Extra: sqlalchemy
17
18
  Requires-Dist: alembic (>=1.0.11,<2.0.0) ; extra == "alembic"
@@ -1,6 +1,6 @@
1
1
  [tool.poetry]
2
2
  name = "databricks-sql-connector"
3
- version = "3.0.3b1"
3
+ version = "3.1.1"
4
4
  description = "Databricks SQL Connector for Python"
5
5
  authors = ["Databricks <databricks-sql-connector-maintainers@databricks.com>"]
6
6
  license = "Apache-2.0"
@@ -58,6 +58,7 @@ exclude = ['ttypes\.py$', 'TCLIService\.py$']
58
58
  exclude = '/(\.eggs|\.git|\.hg|\.mypy_cache|\.nox|\.tox|\.venv|\.svn|_build|buck-out|build|dist|thrift_api)/'
59
59
 
60
60
  [tool.pytest.ini_options]
61
+ markers = {"reviewed" = "Test case has been reviewed by Databricks"}
61
62
  minversion = "6.0"
62
63
  log_cli = "false"
63
64
  log_cli_level = "INFO"
@@ -8,6 +8,39 @@ threadsafety = 1 # Threads may share the module, but not connections.
8
8
 
9
9
  paramstyle = "named"
10
10
 
11
+ import re
12
+
13
+
14
+ class RedactUrlQueryParamsFilter(logging.Filter):
15
+ pattern = re.compile(r"(\?|&)([\w-]+)=([^&]+)")
16
+ mask = r"\1\2=<REDACTED>"
17
+
18
+ def __init__(self):
19
+ super().__init__()
20
+
21
+ def redact(self, string):
22
+ return re.sub(self.pattern, self.mask, str(string))
23
+
24
+ def filter(self, record):
25
+ record.msg = self.redact(str(record.msg))
26
+ if isinstance(record.args, dict):
27
+ for k in record.args.keys():
28
+ record.args[k] = (
29
+ self.redact(record.args[k])
30
+ if isinstance(record.arg[k], str)
31
+ else record.args[k]
32
+ )
33
+ else:
34
+ record.args = tuple(
35
+ (self.redact(arg) if isinstance(arg, str) else arg)
36
+ for arg in record.args
37
+ )
38
+
39
+ return True
40
+
41
+
42
+ logging.getLogger("urllib3.connectionpool").addFilter(RedactUrlQueryParamsFilter())
43
+
11
44
 
12
45
  class DBAPITypeObject(object):
13
46
  def __init__(self, *values):
@@ -29,7 +62,7 @@ DATETIME = DBAPITypeObject("timestamp")
29
62
  DATE = DBAPITypeObject("date")
30
63
  ROWID = DBAPITypeObject()
31
64
 
32
- __version__ = "3.0.3b1"
65
+ __version__ = "3.1.1"
33
66
  USER_AGENT_NAME = "PyDatabricksSqlConnector"
34
67
 
35
68
  # These two functions are pyhive legacy
@@ -8,12 +8,11 @@ from databricks.sql.auth.authenticators import (
8
8
  ExternalAuthProvider,
9
9
  DatabricksOAuthProvider,
10
10
  )
11
- from databricks.sql.auth.endpoint import infer_cloud_from_host, CloudType
12
- from databricks.sql.experimental.oauth_persistence import OAuthPersistence
13
11
 
14
12
 
15
13
  class AuthType(Enum):
16
14
  DATABRICKS_OAUTH = "databricks-oauth"
15
+ AZURE_OAUTH = "azure-oauth"
17
16
  # other supported types (access_token, user/pass) can be inferred
18
17
  # we can add more types as needed later
19
18
 
@@ -51,7 +50,7 @@ class ClientContext:
51
50
  def get_auth_provider(cfg: ClientContext):
52
51
  if cfg.credentials_provider:
53
52
  return ExternalAuthProvider(cfg.credentials_provider)
54
- if cfg.auth_type == AuthType.DATABRICKS_OAUTH.value:
53
+ if cfg.auth_type in [AuthType.DATABRICKS_OAUTH.value, AuthType.AZURE_OAUTH.value]:
55
54
  assert cfg.oauth_redirect_port_range is not None
56
55
  assert cfg.oauth_client_id is not None
57
56
  assert cfg.oauth_scopes is not None
@@ -62,6 +61,7 @@ def get_auth_provider(cfg: ClientContext):
62
61
  cfg.oauth_redirect_port_range,
63
62
  cfg.oauth_client_id,
64
63
  cfg.oauth_scopes,
64
+ cfg.auth_type,
65
65
  )
66
66
  elif cfg.access_token is not None:
67
67
  return AccessTokenAuthProvider(cfg.access_token)
@@ -87,19 +87,22 @@ def normalize_host_name(hostname: str):
87
87
  return f"{maybe_scheme}{hostname}{maybe_trailing_slash}"
88
88
 
89
89
 
90
- def get_client_id_and_redirect_port(hostname: str):
90
+ def get_client_id_and_redirect_port(use_azure_auth: bool):
91
91
  return (
92
92
  (PYSQL_OAUTH_CLIENT_ID, PYSQL_OAUTH_REDIRECT_PORT_RANGE)
93
- if infer_cloud_from_host(hostname) == CloudType.AWS
93
+ if not use_azure_auth
94
94
  else (PYSQL_OAUTH_AZURE_CLIENT_ID, PYSQL_OAUTH_AZURE_REDIRECT_PORT_RANGE)
95
95
  )
96
96
 
97
97
 
98
98
  def get_python_sql_connector_auth_provider(hostname: str, **kwargs):
99
- (client_id, redirect_port_range) = get_client_id_and_redirect_port(hostname)
99
+ auth_type = kwargs.get("auth_type")
100
+ (client_id, redirect_port_range) = get_client_id_and_redirect_port(
101
+ auth_type == AuthType.AZURE_OAUTH.value
102
+ )
100
103
  cfg = ClientContext(
101
104
  hostname=normalize_host_name(hostname),
102
- auth_type=kwargs.get("auth_type"),
105
+ auth_type=auth_type,
103
106
  access_token=kwargs.get("access_token"),
104
107
  username=kwargs.get("_username"),
105
108
  password=kwargs.get("_password"),
@@ -18,6 +18,7 @@ class AuthProvider:
18
18
 
19
19
  HeaderFactory = Callable[[], Dict[str, str]]
20
20
 
21
+
21
22
  # In order to keep compatibility with SDK
22
23
  class CredentialsProvider(abc.ABC):
23
24
  """CredentialsProvider is the protocol (call-side interface)
@@ -69,16 +70,13 @@ class DatabricksOAuthProvider(AuthProvider):
69
70
  redirect_port_range: List[int],
70
71
  client_id: str,
71
72
  scopes: List[str],
73
+ auth_type: str = "databricks-oauth",
72
74
  ):
73
75
  try:
74
- cloud_type = infer_cloud_from_host(hostname)
75
- if not cloud_type:
76
- raise NotImplementedError("Cannot infer the cloud type from hostname")
77
-
78
- idp_endpoint = get_oauth_endpoints(cloud_type)
76
+ idp_endpoint = get_oauth_endpoints(hostname, auth_type == "azure-oauth")
79
77
  if not idp_endpoint:
80
78
  raise NotImplementedError(
81
- f"OAuth is not supported for cloud ${cloud_type.value}"
79
+ f"OAuth is not supported for host ${hostname}"
82
80
  )
83
81
 
84
82
  # Convert to the corresponding scopes in the corresponding IdP
@@ -1,9 +1,9 @@
1
1
  #
2
2
  # It implements all the cloud specific OAuth configuration/metadata
3
3
  #
4
- # Azure: It uses AAD
4
+ # Azure: It uses Databricks internal IdP or Azure AD
5
5
  # AWS: It uses Databricks internal IdP
6
- # GCP: Not support yet
6
+ # GCP: It uses Databricks internal IdP
7
7
  #
8
8
  from abc import ABC, abstractmethod
9
9
  from enum import Enum
@@ -21,6 +21,7 @@ class OAuthScope:
21
21
  class CloudType(Enum):
22
22
  AWS = "aws"
23
23
  AZURE = "azure"
24
+ GCP = "gcp"
24
25
 
25
26
 
26
27
  DATABRICKS_AWS_DOMAINS = [
@@ -34,6 +35,10 @@ DATABRICKS_AZURE_DOMAINS = [
34
35
  ".databricks.azure.cn",
35
36
  ".databricks.azure.us",
36
37
  ]
38
+ DATABRICKS_GCP_DOMAINS = [".gcp.databricks.com"]
39
+
40
+ # Domain supported by Databricks InHouse OAuth
41
+ DATABRICKS_OAUTH_AZURE_DOMAINS = [".azuredatabricks.net"]
37
42
 
38
43
 
39
44
  # Infer cloud type from Databricks SQL instance hostname
@@ -45,10 +50,20 @@ def infer_cloud_from_host(hostname: str) -> Optional[CloudType]:
45
50
  return CloudType.AZURE
46
51
  elif any(e for e in DATABRICKS_AWS_DOMAINS if host.endswith(e)):
47
52
  return CloudType.AWS
53
+ elif any(e for e in DATABRICKS_GCP_DOMAINS if host.endswith(e)):
54
+ return CloudType.GCP
48
55
  else:
49
56
  return None
50
57
 
51
58
 
59
+ def is_supported_databricks_oauth_host(hostname: str) -> bool:
60
+ host = hostname.lower().replace("https://", "").split("/")[0]
61
+ domains = (
62
+ DATABRICKS_AWS_DOMAINS + DATABRICKS_GCP_DOMAINS + DATABRICKS_OAUTH_AZURE_DOMAINS
63
+ )
64
+ return any(e for e in domains if host.endswith(e))
65
+
66
+
52
67
  def get_databricks_oidc_url(hostname: str):
53
68
  maybe_scheme = "https://" if not hostname.startswith("https://") else ""
54
69
  maybe_trailing_slash = "/" if not hostname.endswith("/") else ""
@@ -94,7 +109,7 @@ class AzureOAuthEndpointCollection(OAuthEndpointCollection):
94
109
  return "https://login.microsoftonline.com/organizations/v2.0/.well-known/openid-configuration"
95
110
 
96
111
 
97
- class AwsOAuthEndpointCollection(OAuthEndpointCollection):
112
+ class InHouseOAuthEndpointCollection(OAuthEndpointCollection):
98
113
  def get_scopes_mapping(self, scopes: List[str]) -> List[str]:
99
114
  # No scope mapping in AWS
100
115
  return scopes.copy()
@@ -108,10 +123,18 @@ class AwsOAuthEndpointCollection(OAuthEndpointCollection):
108
123
  return f"{idp_url}/.well-known/oauth-authorization-server"
109
124
 
110
125
 
111
- def get_oauth_endpoints(cloud: CloudType) -> Optional[OAuthEndpointCollection]:
112
- if cloud == CloudType.AWS:
113
- return AwsOAuthEndpointCollection()
126
+ def get_oauth_endpoints(
127
+ hostname: str, use_azure_auth: bool
128
+ ) -> Optional[OAuthEndpointCollection]:
129
+ cloud = infer_cloud_from_host(hostname)
130
+
131
+ if cloud in [CloudType.AWS, CloudType.GCP]:
132
+ return InHouseOAuthEndpointCollection()
114
133
  elif cloud == CloudType.AZURE:
115
- return AzureOAuthEndpointCollection()
134
+ return (
135
+ InHouseOAuthEndpointCollection()
136
+ if is_supported_databricks_oauth_host(hostname) and not use_azure_auth
137
+ else AzureOAuthEndpointCollection()
138
+ )
116
139
  else:
117
140
  return None
@@ -118,13 +118,13 @@ class DatabricksRetryPolicy(Retry):
118
118
  _total: int = urllib3_kwargs.pop("total")
119
119
  _attempts_remaining = _total
120
120
 
121
- _urllib_kwargs_we_care_about = {
122
- "total": _attempts_remaining,
123
- "respect_retry_after_header": True,
124
- "backoff_factor": self.delay_min,
125
- self._allowed_methods_alias: ["POST"],
126
- "status_forcelist": [429, 503, *self.force_dangerous_codes],
127
- }
121
+ _urllib_kwargs_we_care_about = dict(
122
+ total=_attempts_remaining,
123
+ respect_retry_after_header=True,
124
+ backoff_factor=self.delay_min,
125
+ allowed_methods=["POST"],
126
+ status_forcelist=[429, 503, *self.force_dangerous_codes],
127
+ )
128
128
 
129
129
  urllib3_kwargs.update(**_urllib_kwargs_we_care_about)
130
130
 
@@ -132,22 +132,6 @@ class DatabricksRetryPolicy(Retry):
132
132
  **urllib3_kwargs, # type: ignore
133
133
  )
134
134
 
135
- @property
136
- def _allowed_methods_alias(self) -> str:
137
- """Returns `allowed_methods` if installed urllib3 is >=1.26.0
138
- Returns `method_whitelist` otherwise."""
139
-
140
- if not hasattr(self, "__allowed_methods_alias"):
141
- import urllib3
142
- from distutils.version import LooseVersion
143
-
144
- if LooseVersion(urllib3.__version__) >= LooseVersion("1.26"):
145
- self.__allowed_methods_alias = "allowed_methods"
146
- else:
147
- self.__allowed_methods_alias = "method_whitelist"
148
-
149
- return self.__allowed_methods_alias
150
-
151
135
  @classmethod
152
136
  def __private_init__(
153
137
  cls, retry_start_time: float, command_type: Optional[CommandType], **init_kwargs
@@ -217,22 +201,22 @@ class DatabricksRetryPolicy(Retry):
217
201
  # Gather urllib3's current retry state _before_ increment was called
218
202
  # These arguments match the function signature for Retry.__init__
219
203
  # Note: if we update urllib3 we may need to add/remove arguments from this dict
220
- urllib3_init_params = {
221
- "total": self.total,
222
- "connect": self.connect,
223
- "read": self.read,
224
- "redirect": self.redirect,
225
- "status": self.status,
226
- "other": self.other,
227
- self._allowed_methods_alias: self.allowed_methods,
228
- "status_forcelist": self.status_forcelist,
229
- "backoff_factor": self.backoff_factor, # type: ignore
230
- "raise_on_redirect": self.raise_on_redirect,
231
- "raise_on_status": self.raise_on_status,
232
- "history": self.history,
233
- "remove_headers_on_redirect": self.remove_headers_on_redirect,
234
- "respect_retry_after_header": self.respect_retry_after_header,
235
- }
204
+ urllib3_init_params = dict(
205
+ total=self.total,
206
+ connect=self.connect,
207
+ read=self.read,
208
+ redirect=self.redirect,
209
+ status=self.status,
210
+ other=self.other,
211
+ allowed_methods=self.allowed_methods,
212
+ status_forcelist=self.status_forcelist,
213
+ backoff_factor=self.backoff_factor, # type: ignore
214
+ raise_on_redirect=self.raise_on_redirect,
215
+ raise_on_status=self.raise_on_status,
216
+ history=self.history,
217
+ remove_headers_on_redirect=self.remove_headers_on_redirect,
218
+ respect_retry_after_header=self.respect_retry_after_header,
219
+ )
236
220
 
237
221
  # Update urllib3's current state to reflect the incremented counters
238
222
  urllib3_init_params.update(**urllib3_incremented_counters)
@@ -299,8 +283,10 @@ class DatabricksRetryPolicy(Retry):
299
283
  """
300
284
  retry_after = self.get_retry_after(response)
301
285
  if retry_after:
302
- self.check_proposed_wait(retry_after)
303
- time.sleep(retry_after)
286
+ backoff = self.get_backoff_time()
287
+ proposed_wait = max(backoff, retry_after)
288
+ self.check_proposed_wait(proposed_wait)
289
+ time.sleep(proposed_wait)
304
290
  return True
305
291
 
306
292
  return False
@@ -339,6 +325,7 @@ class DatabricksRetryPolicy(Retry):
339
325
  default, this means ExecuteStatement is only retried for codes 429 and 503.
340
326
  This limit prevents automatically retrying non-idempotent commands that could
341
327
  be destructive.
328
+ 5. The request received a 403 response, because this can never succeed.
342
329
 
343
330
 
344
331
  Q: What about OSErrors and Redirects?
@@ -352,6 +339,11 @@ class DatabricksRetryPolicy(Retry):
352
339
  if status_code == 200:
353
340
  return False, "200 codes are not retried"
354
341
 
342
+ if status_code == 403:
343
+ raise NonRecoverableNetworkError(
344
+ "Received 403 - FORBIDDEN. Confirm your authentication credentials."
345
+ )
346
+
355
347
  # Request failed and server said NotImplemented. This isn't recoverable. Don't retry.
356
348
  if status_code == 501:
357
349
  raise NonRecoverableNetworkError("Received code 501 from server.")
@@ -6,6 +6,7 @@ import requests
6
6
  import json
7
7
  import os
8
8
  import decimal
9
+ from uuid import UUID
9
10
 
10
11
  from databricks.sql import __version__
11
12
  from databricks.sql import *
@@ -96,11 +97,8 @@ class Connection:
96
97
  legacy purposes and will be deprecated in a future release. When this parameter is `True` you will see
97
98
  a warning log message. To suppress this log message, set `use_inline_params="silent"`.
98
99
  auth_type: `str`, optional
99
- `databricks-oauth` : to use oauth with fine-grained permission scopes, set to `databricks-oauth`.
100
- This is currently in private preview for Databricks accounts on AWS.
101
- This supports User to Machine OAuth authentication for Databricks on AWS with
102
- any IDP configured. This is only for interactive python applications and open a browser window.
103
- Note this is beta (private preview)
100
+ `databricks-oauth` : to use Databricks OAuth with fine-grained permission scopes, set to `databricks-oauth`.
101
+ `azure-oauth` : to use Microsoft Entra ID OAuth flow, set to `azure-oauth`.
104
102
 
105
103
  oauth_client_id: `str`, optional
106
104
  custom oauth client_id. If not specified, it will use the built-in client_id of databricks-sql-python.
@@ -111,9 +109,9 @@ class Connection:
111
109
 
112
110
  experimental_oauth_persistence: configures preferred storage for persisting oauth tokens.
113
111
  This has to be a class implementing `OAuthPersistence`.
114
- When `auth_type` is set to `databricks-oauth` without persisting the oauth token in a persistence storage
115
- the oauth tokens will only be maintained in memory and if the python process restarts the end user
116
- will have to login again.
112
+ When `auth_type` is set to `databricks-oauth` or `azure-oauth` without persisting the oauth token in a
113
+ persistence storage the oauth tokens will only be maintained in memory and if the python process
114
+ restarts the end user will have to login again.
117
115
  Note this is beta (private preview)
118
116
 
119
117
  For persisting the oauth token in a prod environment you should subclass and implement OAuthPersistence
@@ -609,12 +607,15 @@ class Cursor:
609
607
  "Local file operations are restricted to paths within the configured staging_allowed_local_path"
610
608
  )
611
609
 
612
- # TODO: Experiment with DBR sending real headers.
613
- # The specification says headers will be in JSON format but the current null value is actually an empty list []
610
+ # May be real headers, or could be json string
611
+ headers = (
612
+ json.loads(row.headers) if isinstance(row.headers, str) else row.headers
613
+ )
614
+
614
615
  handler_args = {
615
616
  "presigned_url": row.presignedUrl,
616
617
  "local_file": abs_localFile,
617
- "headers": json.loads(row.headers or "{}"),
618
+ "headers": dict(headers) or {},
618
619
  }
619
620
 
620
621
  logger.debug(
@@ -1004,9 +1005,22 @@ class Cursor:
1004
1005
  def close(self) -> None:
1005
1006
  """Close cursor"""
1006
1007
  self.open = False
1008
+ self.active_op_handle = None
1007
1009
  if self.active_result_set:
1008
1010
  self._close_and_clear_active_result_set()
1009
1011
 
1012
+ @property
1013
+ def query_id(self) -> Optional[str]:
1014
+ """
1015
+ This attribute is an identifier of last executed query.
1016
+
1017
+ This attribute will be ``None`` if the cursor has not had an operation
1018
+ invoked via the execute method yet, or if cursor was closed.
1019
+ """
1020
+ if self.active_op_handle is not None:
1021
+ return str(UUID(bytes=self.active_op_handle.operationId.guid))
1022
+ return None
1023
+
1010
1024
  @property
1011
1025
  def description(self) -> Optional[List[Tuple]]:
1012
1026
  """
@@ -376,8 +376,8 @@ class ThriftBackend:
376
376
  # encapsulate retry checks, returns None || delay-in-secs
377
377
  # Retry IFF 429/503 code + Retry-After header set
378
378
  http_code = getattr(self._transport, "code", None)
379
- retry_after = getattr(self._transport, "headers", {}).get("Retry-After")
380
- if http_code in [429, 503] and retry_after:
379
+ retry_after = getattr(self._transport, "headers", {}).get("Retry-After", 1)
380
+ if http_code in [429, 503]:
381
381
  # bound delay (seconds) by [min_delay*1.5^(attempt-1), max_delay]
382
382
  return bound_retry_delay(attempt, int(retry_after))
383
383
  return None
@@ -64,6 +64,7 @@ class DatabricksDialect(default.DefaultDialect):
64
64
  supports_default_values: bool = False
65
65
  supports_server_side_cursors: bool = False
66
66
  supports_sequences: bool = False
67
+ supports_native_boolean: bool = True
67
68
 
68
69
  colspecs = {
69
70
  sqlalchemy.types.DateTime: dialect_type_impl.TIMESTAMP_NTZ,
@@ -0,0 +1,44 @@
1
+ import os
2
+ import pytest
3
+
4
+
5
+ @pytest.fixture(scope="session")
6
+ def host():
7
+ return os.getenv("DATABRICKS_SERVER_HOSTNAME")
8
+
9
+
10
+ @pytest.fixture(scope="session")
11
+ def http_path():
12
+ return os.getenv("DATABRICKS_HTTP_PATH")
13
+
14
+
15
+ @pytest.fixture(scope="session")
16
+ def access_token():
17
+ return os.getenv("DATABRICKS_TOKEN")
18
+
19
+
20
+ @pytest.fixture(scope="session")
21
+ def ingestion_user():
22
+ return os.getenv("DATABRICKS_USER")
23
+
24
+
25
+ @pytest.fixture(scope="session")
26
+ def catalog():
27
+ return os.getenv("DATABRICKS_CATALOG")
28
+
29
+
30
+ @pytest.fixture(scope="session")
31
+ def schema():
32
+ return os.getenv("DATABRICKS_SCHEMA", "default")
33
+
34
+
35
+ @pytest.fixture(scope="session", autouse=True)
36
+ def connection_details(host, http_path, access_token, ingestion_user, catalog, schema):
37
+ return {
38
+ "host": host,
39
+ "http_path": http_path,
40
+ "access_token": access_token,
41
+ "ingestion_user": ingestion_user,
42
+ "catalog": catalog,
43
+ "schema": schema,
44
+ }
@@ -1,6 +1,5 @@
1
1
  import datetime
2
2
  import decimal
3
- import os
4
3
  from typing import Tuple, Union, List
5
4
  from unittest import skipIf
6
5
 
@@ -19,7 +18,7 @@ from sqlalchemy.engine import Engine
19
18
  from sqlalchemy.engine.reflection import Inspector
20
19
  from sqlalchemy.orm import DeclarativeBase, Mapped, Session, mapped_column
21
20
  from sqlalchemy.schema import DropColumnComment, SetColumnComment
22
- from sqlalchemy.types import BOOLEAN, DECIMAL, Date, DateTime, Integer, String
21
+ from sqlalchemy.types import BOOLEAN, DECIMAL, Date, Integer, String
23
22
 
24
23
  try:
25
24
  from sqlalchemy.orm import declarative_base
@@ -49,12 +48,12 @@ def version_agnostic_select(object_to_select, *args, **kwargs):
49
48
  return select(object_to_select, *args, **kwargs)
50
49
 
51
50
 
52
- def version_agnostic_connect_arguments(catalog=None, schema=None) -> Tuple[str, dict]:
53
- HOST = os.environ.get("host")
54
- HTTP_PATH = os.environ.get("http_path")
55
- ACCESS_TOKEN = os.environ.get("access_token")
56
- CATALOG = catalog or os.environ.get("catalog")
57
- SCHEMA = schema or os.environ.get("schema")
51
+ def version_agnostic_connect_arguments(connection_details) -> Tuple[str, dict]:
52
+ HOST = connection_details["host"]
53
+ HTTP_PATH = connection_details["http_path"]
54
+ ACCESS_TOKEN = connection_details["access_token"]
55
+ CATALOG = connection_details["catalog"]
56
+ SCHEMA = connection_details["schema"]
58
57
 
59
58
  ua_connect_args = {"_user_agent_entry": USER_AGENT_TOKEN}
60
59
 
@@ -77,8 +76,8 @@ def version_agnostic_connect_arguments(catalog=None, schema=None) -> Tuple[str,
77
76
 
78
77
 
79
78
  @pytest.fixture
80
- def db_engine() -> Engine:
81
- conn_string, connect_args = version_agnostic_connect_arguments()
79
+ def db_engine(connection_details) -> Engine:
80
+ conn_string, connect_args = version_agnostic_connect_arguments(connection_details)
82
81
  return create_engine(conn_string, connect_args=connect_args)
83
82
 
84
83
 
@@ -92,10 +91,11 @@ def run_query(db_engine: Engine, query: Union[str, Text]):
92
91
 
93
92
 
94
93
  @pytest.fixture
95
- def samples_engine() -> Engine:
96
- conn_string, connect_args = version_agnostic_connect_arguments(
97
- catalog="samples", schema="nyctaxi"
98
- )
94
+ def samples_engine(connection_details) -> Engine:
95
+ details = connection_details.copy()
96
+ details["catalog"] = "samples"
97
+ details["schema"] = "nyctaxi"
98
+ conn_string, connect_args = version_agnostic_connect_arguments(details)
99
99
  return create_engine(conn_string, connect_args=connect_args)
100
100
 
101
101
 
@@ -141,7 +141,7 @@ def test_connect_args(db_engine):
141
141
  def test_pandas_upload(db_engine, metadata_obj):
142
142
  import pandas as pd
143
143
 
144
- SCHEMA = os.environ.get("schema")
144
+ SCHEMA = "default"
145
145
  try:
146
146
  df = pd.read_excel(
147
147
  "src/databricks/sqlalchemy/test_local/e2e/demo_data/MOCK_DATA.xlsx"
@@ -409,7 +409,9 @@ def test_get_table_names_smoke_test(samples_engine: Engine):
409
409
  _names is not None, "get_table_names did not succeed"
410
410
 
411
411
 
412
- def test_has_table_across_schemas(db_engine: Engine, samples_engine: Engine):
412
+ def test_has_table_across_schemas(
413
+ db_engine: Engine, samples_engine: Engine, catalog: str, schema: str
414
+ ):
413
415
  """For this test to pass these conditions must be met:
414
416
  - Table samples.nyctaxi.trips must exist
415
417
  - Table samples.tpch.customer must exist
@@ -426,9 +428,6 @@ def test_has_table_across_schemas(db_engine: Engine, samples_engine: Engine):
426
428
  )
427
429
 
428
430
  # 3) Check for a table within a different catalog
429
- other_catalog = os.environ.get("catalog")
430
- other_schema = os.environ.get("schema")
431
-
432
431
  # Create a table in a different catalog
433
432
  with db_engine.connect() as conn:
434
433
  conn.execute(text("CREATE TABLE test_has_table (numbers_are_cool INT);"))
@@ -442,8 +441,8 @@ def test_has_table_across_schemas(db_engine: Engine, samples_engine: Engine):
442
441
  assert samples_engine.dialect.has_table(
443
442
  connection=conn,
444
443
  table_name="test_has_table",
445
- schema=other_schema,
446
- catalog=other_catalog,
444
+ schema=schema,
445
+ catalog=catalog,
447
446
  )
448
447
  finally:
449
448
  conn.execute(text("DROP TABLE test_has_table;"))
@@ -503,12 +502,12 @@ def test_get_columns(db_engine, sample_table: str):
503
502
 
504
503
  class TestCommentReflection:
505
504
  @pytest.fixture(scope="class")
506
- def engine(self):
507
- HOST = os.environ.get("host")
508
- HTTP_PATH = os.environ.get("http_path")
509
- ACCESS_TOKEN = os.environ.get("access_token")
510
- CATALOG = os.environ.get("catalog")
511
- SCHEMA = os.environ.get("schema")
505
+ def engine(self, connection_details: dict):
506
+ HOST = connection_details["host"]
507
+ HTTP_PATH = connection_details["http_path"]
508
+ ACCESS_TOKEN = connection_details["access_token"]
509
+ CATALOG = connection_details["catalog"]
510
+ SCHEMA = connection_details["schema"]
512
511
 
513
512
  connection_string = f"databricks://token:{ACCESS_TOKEN}@{HOST}?http_path={HTTP_PATH}&catalog={CATALOG}&schema={SCHEMA}"
514
513
  connect_args = {"_user_agent_entry": USER_AGENT_TOKEN}
@@ -64,16 +64,16 @@ def test_extract_3l_namespace_from_bad_constraint_string():
64
64
  extract_three_level_identifier_from_constraint_string(input)
65
65
 
66
66
 
67
- @pytest.mark.parametrize("schema", [None, "some_schema"])
68
- def test_build_fk_dict(schema):
67
+ @pytest.mark.parametrize("tschema", [None, "some_schema"])
68
+ def test_build_fk_dict(tschema):
69
69
  fk_constraint_string = "FOREIGN KEY (`parent_user_id`) REFERENCES `main`.`some_schema`.`users` (`user_id`)"
70
70
 
71
- result = build_fk_dict("some_fk_name", fk_constraint_string, schema_name=schema)
71
+ result = build_fk_dict("some_fk_name", fk_constraint_string, schema_name=tschema)
72
72
 
73
73
  assert result == {
74
74
  "name": "some_fk_name",
75
75
  "constrained_columns": ["parent_user_id"],
76
- "referred_schema": schema,
76
+ "referred_schema": tschema,
77
77
  "referred_table": "users",
78
78
  "referred_columns": ["user_id"],
79
79
  }
@@ -1,3 +0,0 @@
1
- [pytest]
2
- markers =
3
- reviewed: Test case has been reviewed by databricks