databricks-sql-connector 2.8.0__tar.gz → 2.9.1.dev1__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (34) hide show
  1. {databricks_sql_connector-2.8.0 → databricks_sql_connector-2.9.1.dev1}/CHANGELOG.md +12 -2
  2. {databricks_sql_connector-2.8.0 → databricks_sql_connector-2.9.1.dev1}/PKG-INFO +3 -2
  3. {databricks_sql_connector-2.8.0 → databricks_sql_connector-2.9.1.dev1}/README.md +1 -1
  4. {databricks_sql_connector-2.8.0 → databricks_sql_connector-2.9.1.dev1}/pyproject.toml +13 -17
  5. {databricks_sql_connector-2.8.0 → databricks_sql_connector-2.9.1.dev1}/src/databricks/sql/__init__.py +1 -1
  6. databricks_sql_connector-2.9.1.dev1/src/databricks/sql/auth/retry.py +410 -0
  7. {databricks_sql_connector-2.8.0 → databricks_sql_connector-2.9.1.dev1}/src/databricks/sql/auth/thrift_http_client.py +24 -0
  8. {databricks_sql_connector-2.8.0 → databricks_sql_connector-2.9.1.dev1}/src/databricks/sql/client.py +11 -1
  9. {databricks_sql_connector-2.8.0 → databricks_sql_connector-2.9.1.dev1}/src/databricks/sql/exc.py +22 -0
  10. {databricks_sql_connector-2.8.0 → databricks_sql_connector-2.9.1.dev1}/src/databricks/sql/thrift_backend.py +51 -9
  11. {databricks_sql_connector-2.8.0 → databricks_sql_connector-2.9.1.dev1}/src/databricks/sqlalchemy/dialect/__init__.py +1 -1
  12. {databricks_sql_connector-2.8.0 → databricks_sql_connector-2.9.1.dev1}/LICENSE +0 -0
  13. {databricks_sql_connector-2.8.0 → databricks_sql_connector-2.9.1.dev1}/src/databricks/__init__.py +0 -0
  14. {databricks_sql_connector-2.8.0 → databricks_sql_connector-2.9.1.dev1}/src/databricks/sql/auth/__init__.py +0 -0
  15. {databricks_sql_connector-2.8.0 → databricks_sql_connector-2.9.1.dev1}/src/databricks/sql/auth/auth.py +0 -0
  16. {databricks_sql_connector-2.8.0 → databricks_sql_connector-2.9.1.dev1}/src/databricks/sql/auth/authenticators.py +0 -0
  17. {databricks_sql_connector-2.8.0 → databricks_sql_connector-2.9.1.dev1}/src/databricks/sql/auth/endpoint.py +0 -0
  18. {databricks_sql_connector-2.8.0 → databricks_sql_connector-2.9.1.dev1}/src/databricks/sql/auth/oauth.py +0 -0
  19. {databricks_sql_connector-2.8.0 → databricks_sql_connector-2.9.1.dev1}/src/databricks/sql/auth/oauth_http_handler.py +0 -0
  20. {databricks_sql_connector-2.8.0 → databricks_sql_connector-2.9.1.dev1}/src/databricks/sql/cloudfetch/download_manager.py +0 -0
  21. {databricks_sql_connector-2.8.0 → databricks_sql_connector-2.9.1.dev1}/src/databricks/sql/cloudfetch/downloader.py +0 -0
  22. {databricks_sql_connector-2.8.0 → databricks_sql_connector-2.9.1.dev1}/src/databricks/sql/experimental/__init__.py +0 -0
  23. {databricks_sql_connector-2.8.0 → databricks_sql_connector-2.9.1.dev1}/src/databricks/sql/experimental/oauth_persistence.py +0 -0
  24. {databricks_sql_connector-2.8.0 → databricks_sql_connector-2.9.1.dev1}/src/databricks/sql/thrift_api/TCLIService/TCLIService-remote +0 -0
  25. {databricks_sql_connector-2.8.0 → databricks_sql_connector-2.9.1.dev1}/src/databricks/sql/thrift_api/TCLIService/TCLIService.py +0 -0
  26. {databricks_sql_connector-2.8.0 → databricks_sql_connector-2.9.1.dev1}/src/databricks/sql/thrift_api/TCLIService/__init__.py +0 -0
  27. {databricks_sql_connector-2.8.0 → databricks_sql_connector-2.9.1.dev1}/src/databricks/sql/thrift_api/TCLIService/constants.py +0 -0
  28. {databricks_sql_connector-2.8.0 → databricks_sql_connector-2.9.1.dev1}/src/databricks/sql/thrift_api/TCLIService/ttypes.py +0 -0
  29. {databricks_sql_connector-2.8.0 → databricks_sql_connector-2.9.1.dev1}/src/databricks/sql/thrift_api/__init__.py +0 -0
  30. {databricks_sql_connector-2.8.0 → databricks_sql_connector-2.9.1.dev1}/src/databricks/sql/types.py +0 -0
  31. {databricks_sql_connector-2.8.0 → databricks_sql_connector-2.9.1.dev1}/src/databricks/sql/utils.py +0 -0
  32. {databricks_sql_connector-2.8.0 → databricks_sql_connector-2.9.1.dev1}/src/databricks/sqlalchemy/__init__.py +0 -0
  33. {databricks_sql_connector-2.8.0 → databricks_sql_connector-2.9.1.dev1}/src/databricks/sqlalchemy/dialect/base.py +0 -0
  34. {databricks_sql_connector-2.8.0 → databricks_sql_connector-2.9.1.dev1}/src/databricks/sqlalchemy/dialect/compiler.py +0 -0
@@ -1,10 +1,20 @@
1
1
  # Release History
2
2
 
3
- ## 2.8.x (Unreleased)
3
+ ## 2.9.x (Unreleased)
4
+
5
+ ## 2.9.1 (2023-08-11)
6
+
7
+ - Other: Explicitly pin urllib3 to ^2.0.0
8
+
9
+ ## 2.9.0 (2023-08-10)
10
+
11
+ - Replace retry handling with DatabricksRetryPolicy. This is disabled by default. To enable, set `enable_v3_retries=True` when creating `databricks.sql.client`
12
+ - Other: Fix typo in README quick start example
13
+ - Other: Add autospec to Client mocks and tidy up `make_request`
4
14
 
5
15
  ## 2.8.0 (2023-07-21)
6
16
 
7
- - Add support for Cloud Fetch (#146, #151, #154)
17
+ - Add support for Cloud Fetch. Disabled by default. Set `use_cloud_fetch=True` when building `databricks.sql.client` to enable it (#146, #151, #154)
8
18
  - SQLAlchemy has_table function now honours schema= argument and adds catalog= argument (#174)
9
19
  - SQLAlchemy set non_native_boolean_check_constraint False as it's not supported by Databricks (#120)
10
20
  - Fix: Revised SQLAlchemy dialect and examples for compatibility with SQLAlchemy==1.3.x (#173)
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: databricks-sql-connector
3
- Version: 2.8.0
3
+ Version: 2.9.1.dev1
4
4
  Summary: Databricks SQL Connector for Python
5
5
  License: Apache-2.0
6
6
  Author: Databricks
@@ -25,6 +25,7 @@ Requires-Dist: pyarrow (>=6.0.0) ; python_version >= "3.7" and python_version <
25
25
  Requires-Dist: requests (>=2.18.1,<3.0.0)
26
26
  Requires-Dist: sqlalchemy (>=1.3.24,<2.0.0)
27
27
  Requires-Dist: thrift (>=0.16.0,<0.17.0)
28
+ Requires-Dist: urllib3 (>=2.0.0,<3.0.0)
28
29
  Project-URL: Bug Tracker, https://github.com/databricks/databricks-sql-python/issues
29
30
  Project-URL: Homepage, https://github.com/databricks/databricks-sql-python
30
31
  Description-Content-Type: text/markdown
@@ -70,7 +71,7 @@ from databricks import sql
70
71
 
71
72
  host = os.getenv("DATABRICKS_HOST")
72
73
  http_path = os.getenv("DATABRICKS_HTTP_PATH")
73
- access_token = os.getenv("DATABRICKS_ACCESS_TOKEN")
74
+ access_token = os.getenv("DATABRICKS_TOKEN")
74
75
 
75
76
  connection = sql.connect(
76
77
  server_hostname=host,
@@ -39,7 +39,7 @@ from databricks import sql
39
39
 
40
40
  host = os.getenv("DATABRICKS_HOST")
41
41
  http_path = os.getenv("DATABRICKS_HTTP_PATH")
42
- access_token = os.getenv("DATABRICKS_ACCESS_TOKEN")
42
+ access_token = os.getenv("DATABRICKS_TOKEN")
43
43
 
44
44
  connection = sql.connect(
45
45
  server_hostname=host,
@@ -1,35 +1,35 @@
1
1
  [tool.poetry]
2
2
  name = "databricks-sql-connector"
3
- version = "2.8.0"
3
+ version = "2.9.1dev1"
4
4
  description = "Databricks SQL Connector for Python"
5
5
  authors = ["Databricks <databricks-sql-connector-maintainers@databricks.com>"]
6
6
  license = "Apache-2.0"
7
7
  readme = "README.md"
8
- packages = [{include = "databricks", from = "src"}]
8
+ packages = [{ include = "databricks", from = "src" }]
9
9
  include = ["CHANGELOG.md"]
10
10
 
11
11
  [tool.poetry.dependencies]
12
12
  python = "^3.7.1"
13
13
  thrift = "^0.16.0"
14
14
  pandas = [
15
- {version = ">=1.2.5,<1.4.0", python = ">=3.7,<3.8"},
16
- {version =">=1.2.5,<3.0.0", python = ">=3.8"}
15
+ { version = ">=1.2.5,<1.4.0", python = ">=3.7,<3.8" },
16
+ { version = ">=1.2.5,<3.0.0", python = ">=3.8" },
17
17
  ]
18
-
19
18
  pyarrow = [
20
- {version = ">=6.0.0", python = ">=3.7,<3.11"},
21
- {version = ">=10.0.1", python = ">=3.11"}
19
+ { version = ">=6.0.0", python = ">=3.7,<3.11" },
20
+ { version = ">=10.0.1", python = ">=3.11" },
22
21
  ]
23
22
  lz4 = "^4.0.2"
24
- requests="^2.18.1"
25
- oauthlib="^3.1.0"
23
+ requests = "^2.18.1"
24
+ oauthlib = "^3.1.0"
26
25
  numpy = [
27
- {version = ">=1.16.6", python = ">=3.7,<3.11"},
28
- {version = ">=1.23.4", python = ">=3.11"}
26
+ { version = ">=1.16.6", python = ">=3.7,<3.11" },
27
+ { version = ">=1.23.4", python = ">=3.11" },
29
28
  ]
30
29
  sqlalchemy = "^1.3.24"
31
30
  openpyxl = "^3.0.10"
32
31
  alembic = "^1.0.11"
32
+ urllib3 = "^2.0.0"
33
33
 
34
34
  [tool.poetry.dev-dependencies]
35
35
  pytest = "^7.1.2"
@@ -60,9 +60,5 @@ exclude = '/(\.eggs|\.git|\.hg|\.mypy_cache|\.nox|\.tox|\.venv|\.svn|_build|buck
60
60
  minversion = "6.0"
61
61
  log_cli = "false"
62
62
  log_cli_level = "INFO"
63
- testpaths = [
64
- "tests"
65
- ]
66
- env_files = [
67
- "test.env"
68
- ]
63
+ testpaths = ["tests"]
64
+ env_files = ["test.env"]
@@ -28,7 +28,7 @@ DATETIME = DBAPITypeObject("timestamp")
28
28
  DATE = DBAPITypeObject("date")
29
29
  ROWID = DBAPITypeObject()
30
30
 
31
- __version__ = "2.8.0"
31
+ __version__ = "2.9.1dev1"
32
32
  USER_AGENT_NAME = "PyDatabricksSqlConnector"
33
33
 
34
34
  # These two functions are pyhive legacy
@@ -0,0 +1,410 @@
1
+ import logging
2
+ import time
3
+ import typing
4
+ from enum import Enum
5
+ from typing import List, Optional, Tuple, Union
6
+
7
+ from urllib3 import BaseHTTPResponse # type: ignore
8
+ from urllib3 import Retry
9
+ from urllib3.util.retry import RequestHistory
10
+
11
+ from databricks.sql.exc import (
12
+ CursorAlreadyClosedError,
13
+ MaxRetryDurationError,
14
+ NonRecoverableNetworkError,
15
+ OperationalError,
16
+ SessionAlreadyClosedError,
17
+ UnsafeToRetryError,
18
+ )
19
+
20
+ logger = logging.getLogger(__name__)
21
+
22
+
23
+ class CommandType(Enum):
24
+ EXECUTE_STATEMENT = "ExecuteStatement"
25
+ CLOSE_SESSION = "CloseSession"
26
+ CLOSE_OPERATION = "CloseOperation"
27
+ OTHER = "Other"
28
+
29
+ @classmethod
30
+ def get(cls, value: str):
31
+ value_name_map = {i.value: i.name for i in cls}
32
+ valid_command = value_name_map.get(value, False)
33
+ if valid_command:
34
+ return getattr(cls, str(valid_command))
35
+ else:
36
+ return cls.OTHER
37
+
38
+
39
+ class DatabricksRetryPolicy(Retry):
40
+ """
41
+ Implements our v3 retry policy by extending urllib3's robust default retry behaviour.
42
+
43
+ Retry logic varies based on the overall wall-clock request time and Thrift CommandType
44
+ being issued. ThriftBackend starts a timer and sets the current CommandType prior to
45
+ initiating a network request. See `self.should_retry()` for details about what we do
46
+ and do not retry.
47
+
48
+ :param delay_min:
49
+ Float of seconds for the minimum delay between retries. This is an alias for urllib3's
50
+ `backoff_factor`.
51
+
52
+ :param delay_max:
53
+ Float of seconds for the maximum delay between retries. This is an alias for urllib3's
54
+ `backoff_max`
55
+
56
+ :param stop_after_attempts_count:
57
+ Integer maximum number of attempts that will be retried. This is an alias for urllib3's
58
+ `total`.
59
+
60
+ :param stop_after_attempts_duration:
61
+ Float of maximum number of seconds within which a request may be retried starting from
62
+ the beginning of the first request.
63
+
64
+ :param delay_default:
65
+ Float of seconds the connector will wait between sucessive GetOperationStatus
66
+ requests. This parameter is not used to retry failed network requests. We include
67
+ it in this class to keep all retry behaviour encapsulated in this file.
68
+
69
+ :param force_dangerous_codes:
70
+ List of integer HTTP status codes that the connector will retry, even for dangerous
71
+ commands like ExecuteStatement. This is passed to urllib3 by extending its status_forcelist
72
+
73
+ :param urllib3_kwargs:
74
+ Dictionary of arguments that are passed to Retry.__init__. Any setting of Retry() that
75
+ Databricks does not override or extend may be modified here.
76
+ """
77
+
78
+ def __init__(
79
+ self,
80
+ delay_min: float,
81
+ delay_max: float,
82
+ stop_after_attempts_count: int,
83
+ stop_after_attempts_duration: float,
84
+ delay_default: float,
85
+ force_dangerous_codes: List[int],
86
+ urllib3_kwargs: dict = {},
87
+ ):
88
+ # These values do not change from one command to the next
89
+ self.delay_max = delay_max
90
+ self.delay_min = delay_min
91
+ self.stop_after_attempts_count = stop_after_attempts_count
92
+ self.stop_after_attempts_duration = stop_after_attempts_duration
93
+ self._delay_default = delay_default
94
+ self.force_dangerous_codes = force_dangerous_codes
95
+
96
+ # the urllib3 kwargs are a mix of configuration (some of which we override)
97
+ # and counters like `total` or `connect` which may change between successive retries
98
+ # we only care about urllib3 kwargs that we alias, override, or add to in some way
99
+
100
+ # the length of _history increases as retries are performed
101
+ _history: Optional[Tuple[RequestHistory, ...]] = urllib3_kwargs.get("history")
102
+
103
+ if not _history:
104
+ # no attempts were made so we can retry the current command as many times as specified
105
+ # by the user
106
+ _attempts_remaining = self.stop_after_attempts_count
107
+ else:
108
+ # at least one of our attempts has been consumed, and urllib3 will have set a total
109
+ # `total` is a counter that begins equal to self.stop_after_attempts_count and is
110
+ # decremented after each unsuccessful request. When `total` is zero, urllib3 raises a
111
+ # MaxRetryError
112
+ _total: int = urllib3_kwargs.pop("total")
113
+ _attempts_remaining = _total
114
+
115
+ _urllib_kwargs_we_care_about = dict(
116
+ total=_attempts_remaining,
117
+ respect_retry_after_header=True,
118
+ backoff_factor=self.delay_min,
119
+ backoff_max=self.delay_max,
120
+ allowed_methods=["POST"],
121
+ status_forcelist=[429, 503, *self.force_dangerous_codes],
122
+ )
123
+
124
+ urllib3_kwargs.update(**_urllib_kwargs_we_care_about)
125
+
126
+ super().__init__(
127
+ **urllib3_kwargs, # type: ignore
128
+ )
129
+
130
+ @classmethod
131
+ def __private_init__(
132
+ cls, retry_start_time: float, command_type: Optional[CommandType], **init_kwargs
133
+ ):
134
+ """
135
+ Returns a new instance of DatabricksRetryPolicy with the _retry_start_time and _command_type
136
+ properties already set. This method should only be called by DatabricksRetryPolicy itself between
137
+ successive Retry attempts.
138
+
139
+ :param retry_start_time:
140
+ Float unix timestamp. Used to monitor the overall request duration across successive
141
+ retries. Never set this value directly. Use self.start_retry_timer() instead. Users
142
+ never set this value. It is set by ThriftBackend immediately before issuing a network
143
+ request.
144
+
145
+ :param command_type:
146
+ CommandType of the current request being retried. Used to modify retry behaviour based
147
+ on the type of Thrift command being issued. See self.should_retry() for details. Users
148
+ never set this value directly. It is set by ThriftBackend immediately before issuing
149
+ a network request.
150
+
151
+ :param init_kwargs:
152
+ A dictionary of parameters that will be passed to __init__ in the new object
153
+ """
154
+
155
+ new_object = cls(**init_kwargs)
156
+ new_object._retry_start_time = retry_start_time
157
+ new_object.command_type = command_type
158
+ return new_object
159
+
160
+ def new(self, **urllib3_incremented_counters: typing.Any) -> Retry:
161
+ """This method is responsible for passing the entire Retry state to its next iteration.
162
+
163
+ urllib3 calls Retry.new() between successive requests as part of its `.increment()` method
164
+ as shown below:
165
+
166
+ ```python
167
+ new_retry = self.new(
168
+ total=total,
169
+ connect=connect,
170
+ read=read,
171
+ redirect=redirect,
172
+ status=status_count,
173
+ other=other,
174
+ history=history,
175
+ )
176
+ ```
177
+
178
+ The arguments it passes to `.new()` (total, connect, read, etc.) are those modified by `.increment()`.
179
+
180
+ Since self.__init__ has a different signature than Retry.__init__ , we implement our own `self.new()`
181
+ to pipe our Databricks-specific state while preserving the super-class's behaviour.
182
+
183
+ """
184
+
185
+ # These arguments will match the function signature for self.__init__
186
+ databricks_init_params = dict(
187
+ delay_min=self.delay_min,
188
+ delay_max=self.delay_max,
189
+ stop_after_attempts_count=self.stop_after_attempts_count,
190
+ stop_after_attempts_duration=self.stop_after_attempts_duration,
191
+ delay_default=self.delay_default,
192
+ force_dangerous_codes=self.force_dangerous_codes,
193
+ urllib3_kwargs={},
194
+ )
195
+
196
+ # Gather urllib3's current retry state _before_ increment was called
197
+ # These arguments match the function signature for Retry.__init__
198
+ # Note: if we update urllib3 we may need to add/remove arguments from this dict
199
+ urllib3_init_params = dict(
200
+ total=self.total,
201
+ connect=self.connect,
202
+ read=self.read,
203
+ redirect=self.redirect,
204
+ status=self.status,
205
+ other=self.other,
206
+ allowed_methods=self.allowed_methods,
207
+ status_forcelist=self.status_forcelist,
208
+ backoff_factor=self.backoff_factor, # type: ignore
209
+ backoff_max=self.backoff_max, # type: ignore
210
+ raise_on_redirect=self.raise_on_redirect,
211
+ raise_on_status=self.raise_on_status,
212
+ history=self.history,
213
+ remove_headers_on_redirect=self.remove_headers_on_redirect,
214
+ respect_retry_after_header=self.respect_retry_after_header,
215
+ backoff_jitter=self.backoff_jitter, # type: ignore
216
+ )
217
+
218
+ # Update urllib3's current state to reflect the incremented counters
219
+ urllib3_init_params.update(**urllib3_incremented_counters)
220
+
221
+ # Include urllib3's current state in our __init__ params
222
+ databricks_init_params["urllib3_kwargs"].update(**urllib3_init_params) # type: ignore
223
+
224
+ return type(self).__private_init__(
225
+ retry_start_time=self._retry_start_time,
226
+ command_type=self.command_type,
227
+ **databricks_init_params,
228
+ )
229
+
230
+ @property
231
+ def command_type(self) -> Optional[CommandType]:
232
+ return self._command_type
233
+
234
+ @command_type.setter
235
+ def command_type(self, value: CommandType) -> None:
236
+ self._command_type = value
237
+
238
+ @property
239
+ def delay_default(self) -> float:
240
+ """Time in seconds the connector will wait between requests polling a GetOperationStatus Request
241
+
242
+ This property is never read by urllib3 for the purpose of retries. It's stored in this class
243
+ to keep all retry logic in one place.
244
+
245
+ This property is only set by __init__ and cannot be modified afterward.
246
+ """
247
+ return self._delay_default
248
+
249
+ def start_retry_timer(self) -> None:
250
+ """Timer is used to monitor the overall time across successive requests
251
+
252
+ Should only be called by ThriftBackend before sending a Thrift command"""
253
+ self._retry_start_time = time.time()
254
+
255
+ def check_timer_duration(self) -> float:
256
+ """Return time in seconds since the timer was started"""
257
+
258
+ if self._retry_start_time is None:
259
+ raise OperationalError(
260
+ "Cannot check retry timer. Timer was not started for this request."
261
+ )
262
+ else:
263
+ return time.time() - self._retry_start_time
264
+
265
+ def check_proposed_wait(self, proposed_wait: Union[int, float]) -> None:
266
+ """Raise an exception if the proposed wait would exceed the configured max_attempts_duration"""
267
+
268
+ proposed_overall_time = self.check_timer_duration() + proposed_wait
269
+ if proposed_overall_time > self.stop_after_attempts_duration:
270
+ raise MaxRetryDurationError(
271
+ f"Retry request would exceed Retry policy max retry duration of {self.stop_after_attempts_duration} seconds"
272
+ )
273
+
274
+ def sleep_for_retry(self, response: BaseHTTPResponse) -> bool: # type: ignore
275
+ """Sleeps for the duration specified in the response Retry-After header, if present
276
+
277
+ A MaxRetryDurationError will be raised if doing so would exceed self.max_attempts_duration
278
+
279
+ This method is only called by urllib3 internals.
280
+ """
281
+ retry_after = self.get_retry_after(response)
282
+ if retry_after:
283
+ self.check_proposed_wait(retry_after)
284
+ time.sleep(retry_after)
285
+ return True
286
+
287
+ return False
288
+
289
+ def get_backoff_time(self) -> float:
290
+ """Calls urllib3's built-in get_backoff_time.
291
+
292
+ Never returns a value larger than self.delay_max
293
+ A MaxRetryDurationError will be raised if the calculated backoff would exceed self.max_attempts_duration
294
+
295
+ Note: within urllib3, a backoff is only calculated in cases where a Retry-After header is not present
296
+ in the previous unsuccessful request and `self.respect_retry_after_header` is True (which is always true)
297
+ """
298
+
299
+ proposed_backoff = super().get_backoff_time()
300
+ proposed_backoff = min(proposed_backoff, self.delay_max)
301
+ self.check_proposed_wait(proposed_backoff)
302
+
303
+ return proposed_backoff
304
+
305
+ def should_retry(self, method: str, status_code: int) -> Tuple[bool, str]:
306
+ """This method encapsulates the connector's approach to retries.
307
+
308
+ We always retry a request unless one of these conditions is met:
309
+
310
+ 1. The request received a 200 (Success) status code
311
+ Because the request succeeded .
312
+ 2. The request received a 501 (Not Implemented) status code
313
+ Because this request can never succeed.
314
+ 3. The request received a 404 (Not Found) code and the request CommandType
315
+ was CloseSession or CloseOperation. This code indicates that the session
316
+ or cursor was already closed. Further retries will always return the same
317
+ code.
318
+ 4. The request CommandType was ExecuteStatement and the HTTP code does not
319
+ appear in the default status_forcelist or force_dangerous_codes list. By
320
+ default, this means ExecuteStatement is only retried for codes 429 and 503.
321
+ This limit prevents automatically retrying non-idempotent commands that could
322
+ be destructive.
323
+
324
+
325
+ Q: What about OSErrors and Redirects?
326
+ A: urllib3 automatically retries in both scenarios
327
+
328
+ Returns True if the request should be retried. Returns False or raises an exception
329
+ if a retry would violate the configured policy.
330
+ """
331
+
332
+ # Request succeeded. Don't retry.
333
+ if status_code == 200:
334
+ return False, "200 codes are not retried"
335
+
336
+ # Request failed and server said NotImplemented. This isn't recoverable. Don't retry.
337
+ if status_code == 501:
338
+ raise NonRecoverableNetworkError("Received code 501 from server.")
339
+
340
+ # Request failed and this method is not retryable. We only retry POST requests.
341
+ if not self._is_method_retryable(method): # type: ignore
342
+ return False, "Only POST requests are retried"
343
+
344
+ # Request failed with 404 because CloseSession returns 404 if you repeat the request.
345
+ if (
346
+ status_code == 404
347
+ and self.command_type == CommandType.CLOSE_SESSION
348
+ and len(self.history) > 0
349
+ ):
350
+ raise SessionAlreadyClosedError(
351
+ "CloseSession received 404 code from Databricks. Session is already closed."
352
+ )
353
+
354
+ # Request failed with 404 because CloseOperation returns 404 if you repeat the request.
355
+ if (
356
+ status_code == 404
357
+ and self.command_type == CommandType.CLOSE_OPERATION
358
+ and len(self.history) > 0
359
+ ):
360
+ raise CursorAlreadyClosedError(
361
+ "CloseOperation received 404 code from Databricks. Cursor is already closed."
362
+ )
363
+
364
+ # Request failed, was an ExecuteStatement and the command may have reached the server
365
+ if (
366
+ self.command_type == CommandType.EXECUTE_STATEMENT
367
+ and status_code not in self.status_forcelist
368
+ and status_code not in self.force_dangerous_codes
369
+ ):
370
+ raise UnsafeToRetryError(
371
+ "ExecuteStatement command can only be retried for codes 429 and 503"
372
+ )
373
+
374
+ # Request failed with a dangerous code, was an ExecuteStatement, but user forced retries for this
375
+ # dangerous code. Note that these lines _are not required_ to make these requests retry. They would
376
+ # retry automatically. This code is included only so that we can log the exact reason for the retry.
377
+ # This gives users signal that their _retry_dangerous_codes setting actually did something.
378
+ if (
379
+ self.command_type == CommandType.EXECUTE_STATEMENT
380
+ and status_code in self.force_dangerous_codes
381
+ ):
382
+ return (
383
+ True,
384
+ f"Request failed with dangerous code {status_code} that is one of the configured _retry_dangerous_codes.",
385
+ )
386
+
387
+ # None of the above conditions applied. Eagerly retry.
388
+ logger.debug(
389
+ f"This request should be retried: {self.command_type and self.command_type.value}"
390
+ )
391
+ return (
392
+ True,
393
+ "Failed requests are retried by default per configured DatabricksRetryPolicy",
394
+ )
395
+
396
+ def is_retry(
397
+ self, method: str, status_code: int, has_retry_after: bool = False
398
+ ) -> bool:
399
+ """
400
+ Called by urllib3 when determining whether or not to retry
401
+
402
+ Logs a debug message if the request will be retried
403
+ """
404
+
405
+ should_retry, msg = self.should_retry(method, status_code)
406
+
407
+ if should_retry:
408
+ logger.debug(msg)
409
+
410
+ return should_retry
@@ -15,6 +15,8 @@ from io import BytesIO
15
15
 
16
16
  from urllib3 import HTTPConnectionPool, HTTPSConnectionPool, ProxyManager
17
17
 
18
+ from databricks.sql.auth.retry import CommandType, DatabricksRetryPolicy
19
+
18
20
 
19
21
  class THttpClient(thrift.transport.THttpClient.THttpClient):
20
22
  def __init__(
@@ -28,6 +30,7 @@ class THttpClient(thrift.transport.THttpClient.THttpClient):
28
30
  key_file=None,
29
31
  ssl_context=None,
30
32
  max_connections: int = 1,
33
+ retry_policy: Union[DatabricksRetryPolicy, int] = 0,
31
34
  ):
32
35
  if port is not None:
33
36
  warnings.warn(
@@ -81,6 +84,10 @@ class THttpClient(thrift.transport.THttpClient.THttpClient):
81
84
 
82
85
  self.max_connections = max_connections
83
86
 
87
+ # If retry_policy == 0 then urllib3 will not retry automatically
88
+ # this falls back to the pre-v3 behaviour where thrift_backend.py handles retry logic
89
+ self.retry_policy = retry_policy
90
+
84
91
  self.__wbuf = BytesIO()
85
92
  self.__resp: Union[None, HTTPResponse] = None
86
93
  self.__timeout = None
@@ -92,6 +99,13 @@ class THttpClient(thrift.transport.THttpClient.THttpClient):
92
99
  self._headers = headers
93
100
  super().setCustomHeaders(headers)
94
101
 
102
+ def startRetryTimer(self):
103
+ """Notify DatabricksRetryPolicy of the request start time
104
+
105
+ This is used to enforce the retry_stop_after_attempts_duration
106
+ """
107
+ self.retry_policy and self.retry_policy.start_retry_timer()
108
+
95
109
  def open(self):
96
110
 
97
111
  # self.__pool replaces the self.__http used by the original THttpClient
@@ -167,6 +181,7 @@ class THttpClient(thrift.transport.THttpClient.THttpClient):
167
181
  headers=headers,
168
182
  preload_content=False,
169
183
  timeout=self.__timeout,
184
+ retries=self.retry_policy,
170
185
  )
171
186
 
172
187
  # Get reply to flush the request
@@ -188,3 +203,12 @@ class THttpClient(thrift.transport.THttpClient.THttpClient):
188
203
  )
189
204
  cr = base64.b64encode(ap.encode()).strip()
190
205
  return "Basic " + six.ensure_str(cr)
206
+
207
+ def set_retry_command_type(self, value: CommandType):
208
+ """Pass the provided CommandType to the retry policy"""
209
+ if isinstance(self.retry_policy, DatabricksRetryPolicy):
210
+ self.retry_policy.command_type = value
211
+ else:
212
+ logger.warning(
213
+ "DatabricksRetryPolicy is currently bypassed. The CommandType cannot be set."
214
+ )
@@ -8,7 +8,11 @@ import os
8
8
 
9
9
  from databricks.sql import __version__
10
10
  from databricks.sql import *
11
- from databricks.sql.exc import OperationalError
11
+ from databricks.sql.exc import (
12
+ OperationalError,
13
+ SessionAlreadyClosedError,
14
+ CursorAlreadyClosedError,
15
+ )
12
16
  from databricks.sql.thrift_backend import ThriftBackend
13
17
  from databricks.sql.utils import ExecuteResponse, ParamEscaper, inject_parameters
14
18
  from databricks.sql.types import Row
@@ -257,6 +261,9 @@ class Connection:
257
261
 
258
262
  try:
259
263
  self.thrift_backend.close_session(self._session_handle)
264
+ except RequestError as e:
265
+ if isinstance(e.args[1], SessionAlreadyClosedError):
266
+ logger.info("Session was closed by a prior request")
260
267
  except DatabaseError as e:
261
268
  if "Invalid SessionHandle" in str(e):
262
269
  logger.warning(
@@ -958,6 +965,9 @@ class ResultSet:
958
965
  and self.connection.open
959
966
  ):
960
967
  self.thrift_backend.close_command(self.command_id)
968
+ except RequestError as e:
969
+ if isinstance(e.args[1], CursorAlreadyClosedError):
970
+ logger.info("Operation was canceled by a prior request")
961
971
  finally:
962
972
  self.has_been_closed_server_side = True
963
973
  self.op_state = self.thrift_backend.CLOSED_OP_STATE
@@ -93,3 +93,25 @@ class RequestError(OperationalError):
93
93
  """
94
94
 
95
95
  pass
96
+
97
+
98
+ class MaxRetryDurationError(RequestError):
99
+ """Thrown if the next HTTP request retry would exceed the configured
100
+ stop_after_attempts_duration
101
+ """
102
+
103
+
104
+ class NonRecoverableNetworkError(RequestError):
105
+ """Thrown if an HTTP code 501 is received"""
106
+
107
+
108
+ class UnsafeToRetryError(RequestError):
109
+ """Thrown if ExecuteStatement request receives a code other than 200, 429, or 503"""
110
+
111
+
112
+ class SessionAlreadyClosedError(RequestError):
113
+ """Thrown if CloseSession receives a code 404. ThriftBackend should gracefully proceed as this is expected."""
114
+
115
+
116
+ class CursorAlreadyClosedError(RequestError):
117
+ """Thrown if CancelOperation receives a code 404. ThriftBackend should gracefully proceed as this is expected."""
@@ -17,9 +17,11 @@ import thrift.transport.TTransport
17
17
  import urllib3.exceptions
18
18
 
19
19
  import databricks.sql.auth.thrift_http_client
20
+ from databricks.sql.auth.thrift_http_client import CommandType
20
21
  from databricks.sql.auth.authenticators import AuthProvider
21
22
  from databricks.sql.thrift_api.TCLIService import TCLIService, ttypes
22
23
  from databricks.sql import *
24
+ from databricks.sql.exc import MaxRetryDurationError
23
25
  from databricks.sql.thrift_api.TCLIService.TCLIService import (
24
26
  Client as TCLIServiceClient,
25
27
  )
@@ -70,6 +72,12 @@ class ThriftBackend:
70
72
  CLOSED_OP_STATE = ttypes.TOperationState.CLOSED_STATE
71
73
  ERROR_OP_STATE = ttypes.TOperationState.ERROR_STATE
72
74
 
75
+ _retry_delay_min: float
76
+ _retry_delay_max: float
77
+ _retry_stop_after_attempts_count: int
78
+ _retry_stop_after_attempts_duration: float
79
+ _retry_delay_default: float
80
+
73
81
  def __init__(
74
82
  self,
75
83
  server_hostname: str,
@@ -113,9 +121,15 @@ class ThriftBackend:
113
121
  #
114
122
  # _retry_stop_after_attempts_count
115
123
  # The maximum number of times we should retry retryable requests (defaults to 24)
124
+ # _retry_dangerous_codes
125
+ # An iterable of integer HTTP status codes. ExecuteStatement commands will be retried if these codes are received.
126
+ # (defaults to [])
116
127
  # _socket_timeout
117
128
  # The timeout in seconds for socket send, recv and connect operations. Should be a positive float or integer.
118
129
  # (defaults to 900)
130
+ # _enable_v3_retries
131
+ # Whether to use the DatabricksRetryPolicy implemented in urllib3
132
+ # (defaults to False)
119
133
  # max_download_threads
120
134
  # Number of threads for handling cloud fetch downloads. Defaults to 10
121
135
 
@@ -166,10 +180,28 @@ class ThriftBackend:
166
180
 
167
181
  self._auth_provider = auth_provider
168
182
 
183
+ # Connector version 3 retry approach
184
+ self.enable_v3_retries = kwargs.get("_enable_v3_retries", False)
185
+ self.force_dangerous_codes = kwargs.get("_retry_dangerous_codes", [])
186
+
187
+ additional_transport_args = {}
188
+ if self.enable_v3_retries:
189
+ self.retry_policy = databricks.sql.auth.thrift_http_client.DatabricksRetryPolicy(
190
+ delay_min=self._retry_delay_min,
191
+ delay_max=self._retry_delay_max,
192
+ stop_after_attempts_count=self._retry_stop_after_attempts_count,
193
+ stop_after_attempts_duration=self._retry_stop_after_attempts_duration,
194
+ delay_default=self._retry_delay_default,
195
+ force_dangerous_codes=self.force_dangerous_codes,
196
+ )
197
+
198
+ additional_transport_args["retry_policy"] = self.retry_policy
199
+
169
200
  self._transport = databricks.sql.auth.thrift_http_client.THttpClient(
170
201
  auth_provider=self._auth_provider,
171
202
  uri_or_host=uri,
172
203
  ssl_context=ssl_context,
204
+ **additional_transport_args, # type: ignore
173
205
  )
174
206
 
175
207
  timeout = kwargs.get("_socket_timeout", DEFAULT_SOCKET_TIMEOUT)
@@ -188,6 +220,7 @@ class ThriftBackend:
188
220
 
189
221
  self._request_lock = threading.RLock()
190
222
 
223
+ # TODO: Move this bounding logic into DatabricksRetryPolicy for v3 (PECO-918)
191
224
  def _initialize_retry_args(self, kwargs):
192
225
  # Configure retries & timing: use user-settings or defaults, and bound
193
226
  # by policy. Log.warn when given param gets restricted.
@@ -335,15 +368,17 @@ class ThriftBackend:
335
368
 
336
369
  error, error_message, retry_delay = None, None, None
337
370
  try:
338
- # The MagicMocks in our unit tests have a `name` property instead of `__name__`.
339
- logger.debug(
340
- "Sending request: {}(<REDACTED>)".format(
341
- getattr(
342
- method, "__name__", getattr(method, "name", "UnknownMethod")
343
- )
344
- )
345
- )
371
+
372
+ this_method_name = getattr(method, "__name__")
373
+
374
+ logger.debug("Sending request: {}(<REDACTED>)".format(this_method_name))
346
375
  unsafe_logger.debug("Sending request: {}".format(request))
376
+
377
+ # These three lines are no-ops if the v3 retry policy is not in use
378
+ this_command_type = CommandType.get(this_method_name)
379
+ self._transport.set_retry_command_type(this_command_type)
380
+ self._transport.startRetryTimer()
381
+
347
382
  response = method(request)
348
383
 
349
384
  # Calling `close()` here releases the active HTTP connection back to the pool
@@ -359,9 +394,16 @@ class ThriftBackend:
359
394
  except urllib3.exceptions.HTTPError as err:
360
395
  # retry on timeout. Happens a lot in Azure and it is safe as data has not been sent to server yet
361
396
 
397
+ # TODO: don't use exception handling for GOS polling...
398
+
362
399
  gos_name = TCLIServiceClient.GetOperationStatus.__name__
363
400
  if method.__name__ == gos_name:
364
- retry_delay = bound_retry_delay(attempt, self._retry_delay_default)
401
+ delay_default = (
402
+ self.enable_v3_retries
403
+ and self.retry_policy.delay_default
404
+ or self._retry_delay_default
405
+ )
406
+ retry_delay = bound_retry_delay(attempt, delay_default)
365
407
  logger.info(
366
408
  f"GetOperationStatus failed with HTTP error and will be retried: {str(err)}"
367
409
  )
@@ -74,7 +74,7 @@ class DatabricksDialect(default.DefaultDialect):
74
74
  driver: str = "databricks-sql-python"
75
75
  default_schema_name: str = "default"
76
76
 
77
- preparer = DatabricksIdentifierPreparer
77
+ preparer = DatabricksIdentifierPreparer # type: ignore
78
78
  type_compiler = DatabricksTypeCompiler
79
79
  ddl_compiler = DatabricksDDLCompiler
80
80
  supports_statement_cache: bool = True