databricks-sql-connector 2.9.2.dev1__tar.gz → 2.9.4b1__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (34) hide show
  1. {databricks_sql_connector-2.9.2.dev1 → databricks_sql_connector-2.9.4b1}/CHANGELOG.md +22 -5
  2. {databricks_sql_connector-2.9.2.dev1 → databricks_sql_connector-2.9.4b1}/PKG-INFO +2 -1
  3. {databricks_sql_connector-2.9.2.dev1 → databricks_sql_connector-2.9.4b1}/pyproject.toml +1 -1
  4. {databricks_sql_connector-2.9.2.dev1 → databricks_sql_connector-2.9.4b1}/src/databricks/sql/__init__.py +33 -1
  5. {databricks_sql_connector-2.9.2.dev1 → databricks_sql_connector-2.9.4b1}/src/databricks/sql/auth/retry.py +1 -5
  6. {databricks_sql_connector-2.9.2.dev1 → databricks_sql_connector-2.9.4b1}/src/databricks/sql/cloudfetch/download_manager.py +8 -32
  7. {databricks_sql_connector-2.9.2.dev1 → databricks_sql_connector-2.9.4b1}/src/databricks/sql/cloudfetch/downloader.py +93 -27
  8. {databricks_sql_connector-2.9.2.dev1 → databricks_sql_connector-2.9.4b1}/src/databricks/sql/exc.py +4 -0
  9. {databricks_sql_connector-2.9.2.dev1 → databricks_sql_connector-2.9.4b1}/src/databricks/sql/thrift_backend.py +18 -5
  10. {databricks_sql_connector-2.9.2.dev1 → databricks_sql_connector-2.9.4b1}/src/databricks/sql/utils.py +8 -0
  11. {databricks_sql_connector-2.9.2.dev1 → databricks_sql_connector-2.9.4b1}/LICENSE +0 -0
  12. {databricks_sql_connector-2.9.2.dev1 → databricks_sql_connector-2.9.4b1}/README.md +0 -0
  13. {databricks_sql_connector-2.9.2.dev1 → databricks_sql_connector-2.9.4b1}/src/databricks/__init__.py +0 -0
  14. {databricks_sql_connector-2.9.2.dev1 → databricks_sql_connector-2.9.4b1}/src/databricks/sql/auth/__init__.py +0 -0
  15. {databricks_sql_connector-2.9.2.dev1 → databricks_sql_connector-2.9.4b1}/src/databricks/sql/auth/auth.py +0 -0
  16. {databricks_sql_connector-2.9.2.dev1 → databricks_sql_connector-2.9.4b1}/src/databricks/sql/auth/authenticators.py +0 -0
  17. {databricks_sql_connector-2.9.2.dev1 → databricks_sql_connector-2.9.4b1}/src/databricks/sql/auth/endpoint.py +0 -0
  18. {databricks_sql_connector-2.9.2.dev1 → databricks_sql_connector-2.9.4b1}/src/databricks/sql/auth/oauth.py +0 -0
  19. {databricks_sql_connector-2.9.2.dev1 → databricks_sql_connector-2.9.4b1}/src/databricks/sql/auth/oauth_http_handler.py +0 -0
  20. {databricks_sql_connector-2.9.2.dev1 → databricks_sql_connector-2.9.4b1}/src/databricks/sql/auth/thrift_http_client.py +0 -0
  21. {databricks_sql_connector-2.9.2.dev1 → databricks_sql_connector-2.9.4b1}/src/databricks/sql/client.py +0 -0
  22. {databricks_sql_connector-2.9.2.dev1 → databricks_sql_connector-2.9.4b1}/src/databricks/sql/experimental/__init__.py +0 -0
  23. {databricks_sql_connector-2.9.2.dev1 → databricks_sql_connector-2.9.4b1}/src/databricks/sql/experimental/oauth_persistence.py +0 -0
  24. {databricks_sql_connector-2.9.2.dev1 → databricks_sql_connector-2.9.4b1}/src/databricks/sql/thrift_api/TCLIService/TCLIService-remote +0 -0
  25. {databricks_sql_connector-2.9.2.dev1 → databricks_sql_connector-2.9.4b1}/src/databricks/sql/thrift_api/TCLIService/TCLIService.py +0 -0
  26. {databricks_sql_connector-2.9.2.dev1 → databricks_sql_connector-2.9.4b1}/src/databricks/sql/thrift_api/TCLIService/__init__.py +0 -0
  27. {databricks_sql_connector-2.9.2.dev1 → databricks_sql_connector-2.9.4b1}/src/databricks/sql/thrift_api/TCLIService/constants.py +0 -0
  28. {databricks_sql_connector-2.9.2.dev1 → databricks_sql_connector-2.9.4b1}/src/databricks/sql/thrift_api/TCLIService/ttypes.py +0 -0
  29. {databricks_sql_connector-2.9.2.dev1 → databricks_sql_connector-2.9.4b1}/src/databricks/sql/thrift_api/__init__.py +0 -0
  30. {databricks_sql_connector-2.9.2.dev1 → databricks_sql_connector-2.9.4b1}/src/databricks/sql/types.py +0 -0
  31. {databricks_sql_connector-2.9.2.dev1 → databricks_sql_connector-2.9.4b1}/src/databricks/sqlalchemy/__init__.py +0 -0
  32. {databricks_sql_connector-2.9.2.dev1 → databricks_sql_connector-2.9.4b1}/src/databricks/sqlalchemy/dialect/__init__.py +0 -0
  33. {databricks_sql_connector-2.9.2.dev1 → databricks_sql_connector-2.9.4b1}/src/databricks/sqlalchemy/dialect/base.py +0 -0
  34. {databricks_sql_connector-2.9.2.dev1 → databricks_sql_connector-2.9.4b1}/src/databricks/sqlalchemy/dialect/compiler.py +0 -0
@@ -1,14 +1,31 @@
1
1
  # Release History
2
2
 
3
- ## 2.9.x (Unreleased)
3
+ ## 2.9.4 (Unreleased)
4
4
 
5
- - Other: Explicitly pin urllib3 to ^2.0.0
5
+ ## 2.9.4b1 (2024-02-16)
6
+
7
+ - Fix: Cloud fetch file download errors (#356)
8
+ - Fix: Redact the URL query parameters from the urllib3.connectionpool logs (#341)
9
+
10
+ ## 2.9.3 (2023-08-24)
11
+
12
+ - Fix: Connections failed when urllib3~=1.0.0 is installed (#206)
13
+
14
+ ## 2.9.2 (2023-08-17)
15
+
16
+ - Other: Add `examples/v3_retries_query_execute.py` (#199)
17
+ - Other: suppress log message when `_enable_v3_retries` is not `True` (#199)
18
+ - Other: make this connector backwards compatible with `urllib3>=1.0.0` (#197)
19
+
20
+ ## 2.9.1 (2023-08-11)
21
+
22
+ - Other: Explicitly pin urllib3 to ^2.0.0 (#191)
6
23
 
7
24
  ## 2.9.0 (2023-08-10)
8
25
 
9
- - Replace retry handling with DatabricksRetryPolicy. This is disabled by default. To enable, set `enable_v3_retries=True` when creating `databricks.sql.client`
10
- - Other: Fix typo in README quick start example
11
- - Other: Add autospec to Client mocks and tidy up `make_request`
26
+ - Replace retry handling with DatabricksRetryPolicy. This is disabled by default. To enable, set `enable_v3_retries=True` when creating `databricks.sql.client` (#182)
27
+ - Other: Fix typo in README quick start example (#186)
28
+ - Other: Add autospec to Client mocks and tidy up `make_request` (#188)
12
29
 
13
30
  ## 2.8.0 (2023-07-21)
14
31
 
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: databricks-sql-connector
3
- Version: 2.9.2.dev1
3
+ Version: 2.9.4b1
4
4
  Summary: Databricks SQL Connector for Python
5
5
  License: Apache-2.0
6
6
  Author: Databricks
@@ -12,6 +12,7 @@ Classifier: Programming Language :: Python :: 3.8
12
12
  Classifier: Programming Language :: Python :: 3.9
13
13
  Classifier: Programming Language :: Python :: 3.10
14
14
  Classifier: Programming Language :: Python :: 3.11
15
+ Classifier: Programming Language :: Python :: 3.12
15
16
  Requires-Dist: alembic (>=1.0.11,<2.0.0)
16
17
  Requires-Dist: lz4 (>=4.0.2,<5.0.0)
17
18
  Requires-Dist: numpy (>=1.16.6) ; python_version >= "3.7" and python_version < "3.11"
@@ -1,6 +1,6 @@
1
1
  [tool.poetry]
2
2
  name = "databricks-sql-connector"
3
- version = "2.9.2dev1"
3
+ version = "2.9.4b1"
4
4
  description = "Databricks SQL Connector for Python"
5
5
  authors = ["Databricks <databricks-sql-connector-maintainers@databricks.com>"]
6
6
  license = "Apache-2.0"
@@ -7,6 +7,38 @@ apilevel = "2.0"
7
7
  threadsafety = 1 # Threads may share the module, but not connections.
8
8
  paramstyle = "pyformat" # Python extended format codes, e.g. ...WHERE name=%(name)s
9
9
 
10
+ import re
11
+
12
+
13
+ class RedactUrlQueryParamsFilter(logging.Filter):
14
+ pattern = re.compile(r"(\?|&)([\w-]+)=([^&\s]+)")
15
+ mask = r"\1\2=<REDACTED>"
16
+
17
+ def __init__(self):
18
+ super().__init__()
19
+
20
+ def redact(self, string):
21
+ return re.sub(self.pattern, self.mask, str(string))
22
+
23
+ def filter(self, record):
24
+ record.msg = self.redact(str(record.msg))
25
+ if isinstance(record.args, dict):
26
+ for k in record.args.keys():
27
+ record.args[k] = (
28
+ self.redact(record.args[k])
29
+ if isinstance(record.arg[k], str)
30
+ else record.args[k]
31
+ )
32
+ else:
33
+ record.args = tuple(
34
+ (self.redact(arg) if isinstance(arg, str) else arg)
35
+ for arg in record.args
36
+ )
37
+
38
+ return True
39
+
40
+
41
+ logging.getLogger("urllib3.connectionpool").addFilter(RedactUrlQueryParamsFilter())
10
42
 
11
43
  class DBAPITypeObject(object):
12
44
  def __init__(self, *values):
@@ -28,7 +60,7 @@ DATETIME = DBAPITypeObject("timestamp")
28
60
  DATE = DBAPITypeObject("date")
29
61
  ROWID = DBAPITypeObject()
30
62
 
31
- __version__ = "2.9.2dev1"
63
+ __version__ = "2.9.4b1"
32
64
  USER_AGENT_NAME = "PyDatabricksSqlConnector"
33
65
 
34
66
  # These two functions are pyhive legacy
@@ -56,8 +56,7 @@ class DatabricksRetryPolicy(Retry):
56
56
  `backoff_factor`.
57
57
 
58
58
  :param delay_max:
59
- Float of seconds for the maximum delay between retries. This is an alias for urllib3's
60
- `backoff_max`
59
+ Float of seconds for the maximum delay between retries.
61
60
 
62
61
  :param stop_after_attempts_count:
63
62
  Integer maximum number of attempts that will be retried. This is an alias for urllib3's
@@ -122,7 +121,6 @@ class DatabricksRetryPolicy(Retry):
122
121
  total=_attempts_remaining,
123
122
  respect_retry_after_header=True,
124
123
  backoff_factor=self.delay_min,
125
- backoff_max=self.delay_max,
126
124
  allowed_methods=["POST"],
127
125
  status_forcelist=[429, 503, *self.force_dangerous_codes],
128
126
  )
@@ -212,13 +210,11 @@ class DatabricksRetryPolicy(Retry):
212
210
  allowed_methods=self.allowed_methods,
213
211
  status_forcelist=self.status_forcelist,
214
212
  backoff_factor=self.backoff_factor, # type: ignore
215
- backoff_max=self.backoff_max, # type: ignore
216
213
  raise_on_redirect=self.raise_on_redirect,
217
214
  raise_on_status=self.raise_on_status,
218
215
  history=self.history,
219
216
  remove_headers_on_redirect=self.remove_headers_on_redirect,
220
217
  respect_retry_after_header=self.respect_retry_after_header,
221
- backoff_jitter=self.backoff_jitter, # type: ignore
222
218
  )
223
219
 
224
220
  # Update urllib3's current state to reflect the incremented counters
@@ -8,6 +8,7 @@ from databricks.sql.cloudfetch.downloader import (
8
8
  ResultSetDownloadHandler,
9
9
  DownloadableResultSettings,
10
10
  )
11
+ from databricks.sql.exc import ResultSetDownloadError
11
12
  from databricks.sql.thrift_api.TCLIService.ttypes import TSparkArrowResultLink
12
13
 
13
14
  logger = logging.getLogger(__name__)
@@ -34,8 +35,6 @@ class ResultFileDownloadManager:
34
35
  self.download_handlers: List[ResultSetDownloadHandler] = []
35
36
  self.thread_pool = ThreadPoolExecutor(max_workers=max_download_threads + 1)
36
37
  self.downloadable_result_settings = DownloadableResultSettings(lz4_compressed)
37
- self.fetch_need_retry = False
38
- self.num_consecutive_result_file_download_retries = 0
39
38
 
40
39
  def add_file_links(
41
40
  self, t_spark_arrow_result_links: List[TSparkArrowResultLink]
@@ -81,13 +80,15 @@ class ResultFileDownloadManager:
81
80
 
82
81
  # Find next file
83
82
  idx = self._find_next_file_index(next_row_offset)
83
+ # is this correct?
84
84
  if idx is None:
85
85
  self._shutdown_manager()
86
+ logger.debug("could not find next file index")
86
87
  return None
87
88
  handler = self.download_handlers[idx]
88
89
 
89
90
  # Check (and wait) for download status
90
- if self._check_if_download_successful(handler):
91
+ if handler.is_file_download_successful():
91
92
  # Buffer should be empty so set buffer to new ArrowQueue with result_file
92
93
  result = DownloadedFile(
93
94
  handler.result_file,
@@ -97,9 +98,11 @@ class ResultFileDownloadManager:
97
98
  self.download_handlers.pop(idx)
98
99
  # Return True upon successful download to continue loop and not force a retry
99
100
  return result
100
- # Download was not successful for next download item, force a retry
101
+ # Download was not successful for next download item. Fail
101
102
  self._shutdown_manager()
102
- return None
103
+ raise ResultSetDownloadError(
104
+ f"Download failed for result set starting at {next_row_offset}"
105
+ )
103
106
 
104
107
  def _remove_past_handlers(self, next_row_offset: int):
105
108
  # Any link in which its start to end range doesn't include the next row to be fetched does not need downloading
@@ -133,33 +136,6 @@ class ResultFileDownloadManager:
133
136
  ]
134
137
  return next_indices[0] if len(next_indices) > 0 else None
135
138
 
136
- def _check_if_download_successful(self, handler: ResultSetDownloadHandler):
137
- # Check (and wait until download finishes) if download was successful
138
- if not handler.is_file_download_successful():
139
- if handler.is_link_expired:
140
- self.fetch_need_retry = True
141
- return False
142
- elif handler.is_download_timedout:
143
- # Consecutive file retries should not exceed threshold in settings
144
- if (
145
- self.num_consecutive_result_file_download_retries
146
- >= self.downloadable_result_settings.max_consecutive_file_download_retries
147
- ):
148
- self.fetch_need_retry = True
149
- return False
150
- self.num_consecutive_result_file_download_retries += 1
151
-
152
- # Re-submit handler run to thread pool and recursively check download status
153
- self.thread_pool.submit(handler.run)
154
- return self._check_if_download_successful(handler)
155
- else:
156
- self.fetch_need_retry = True
157
- return False
158
-
159
- self.num_consecutive_result_file_download_retries = 0
160
- self.fetch_need_retry = False
161
- return True
162
-
163
139
  def _shutdown_manager(self):
164
140
  # Clear download handlers and shutdown the thread pool
165
141
  self.download_handlers = []
@@ -1,15 +1,17 @@
1
1
  import logging
2
2
  from dataclasses import dataclass
3
-
4
3
  import requests
5
4
  import lz4.frame
6
5
  import threading
7
6
  import time
8
-
7
+ import os
8
+ import re
9
9
  from databricks.sql.thrift_api.TCLIService.ttypes import TSparkArrowResultLink
10
10
 
11
11
  logger = logging.getLogger(__name__)
12
12
 
13
+ DEFAULT_CLOUD_FILE_TIMEOUT = int(os.getenv("DATABRICKS_CLOUD_FILE_TIMEOUT", 60))
14
+
13
15
 
14
16
  @dataclass
15
17
  class DownloadableResultSettings:
@@ -20,13 +22,17 @@ class DownloadableResultSettings:
20
22
  is_lz4_compressed (bool): Whether file is expected to be lz4 compressed.
21
23
  link_expiry_buffer_secs (int): Time in seconds to prevent download of a link before it expires. Default 0 secs.
22
24
  download_timeout (int): Timeout for download requests. Default 60 secs.
23
- max_consecutive_file_download_retries (int): Number of consecutive download retries before shutting down.
25
+ download_max_retries (int): Number of consecutive download retries before shutting down.
26
+ max_retries (int): Number of consecutive download retries before shutting down.
27
+ backoff_factor (int): Factor to increase wait time between retries.
28
+
24
29
  """
25
30
 
26
31
  is_lz4_compressed: bool
27
32
  link_expiry_buffer_secs: int = 0
28
- download_timeout: int = 60
29
- max_consecutive_file_download_retries: int = 0
33
+ download_timeout: int = DEFAULT_CLOUD_FILE_TIMEOUT
34
+ max_retries: int = 5
35
+ backoff_factor: int = 2
30
36
 
31
37
 
32
38
  class ResultSetDownloadHandler(threading.Thread):
@@ -57,16 +63,21 @@ class ResultSetDownloadHandler(threading.Thread):
57
63
  else None
58
64
  )
59
65
  try:
66
+ logger.debug(
67
+ f"waiting for at most {timeout} seconds for download file: startRow {self.result_link.startRowOffset}, rowCount {self.result_link.rowCount}, endRow {self.result_link.startRowOffset + self.result_link.rowCount}"
68
+ )
69
+
60
70
  if not self.is_download_finished.wait(timeout=timeout):
61
71
  self.is_download_timedout = True
62
- logger.debug(
63
- "Cloud fetch download timed out after {} seconds for link representing rows {} to {}".format(
64
- self.settings.download_timeout,
65
- self.result_link.startRowOffset,
66
- self.result_link.startRowOffset + self.result_link.rowCount,
67
- )
72
+ logger.error(
73
+ f"cloud fetch download timed out after {self.settings.download_timeout} seconds for link representing rows {self.result_link.startRowOffset} to {self.result_link.startRowOffset + self.result_link.rowCount}"
68
74
  )
69
- return False
75
+ # there are some weird cases when the is_download_finished is not set, but the file is downloaded successfully
76
+ return self.is_file_downloaded_successfully
77
+
78
+ logger.debug(
79
+ f"finish waiting for download file: startRow {self.result_link.startRowOffset}, rowCount {self.result_link.rowCount}, endRow {self.result_link.startRowOffset + self.result_link.rowCount}"
80
+ )
70
81
  except Exception as e:
71
82
  logger.error(e)
72
83
  return False
@@ -81,24 +92,36 @@ class ResultSetDownloadHandler(threading.Thread):
81
92
  """
82
93
  self._reset()
83
94
 
84
- # Check if link is already expired or is expiring
85
- if ResultSetDownloadHandler.check_link_expired(
86
- self.result_link, self.settings.link_expiry_buffer_secs
87
- ):
88
- self.is_link_expired = True
89
- return
95
+ try:
96
+ # Check if link is already expired or is expiring
97
+ if ResultSetDownloadHandler.check_link_expired(
98
+ self.result_link, self.settings.link_expiry_buffer_secs
99
+ ):
100
+ self.is_link_expired = True
101
+ return
90
102
 
91
- session = requests.Session()
92
- session.timeout = self.settings.download_timeout
103
+ logger.debug(
104
+ f"started to download file: startRow {self.result_link.startRowOffset}, rowCount {self.result_link.rowCount}, endRow {self.result_link.startRowOffset + self.result_link.rowCount}"
105
+ )
93
106
 
94
- try:
95
107
  # Get the file via HTTP request
96
- response = session.get(self.result_link.fileLink)
108
+ response = http_get_with_retry(
109
+ url=self.result_link.fileLink,
110
+ max_retries=self.settings.max_retries,
111
+ backoff_factor=self.settings.backoff_factor,
112
+ download_timeout=self.settings.download_timeout,
113
+ )
97
114
 
98
- if not response.ok:
99
- self.is_file_downloaded_successfully = False
115
+ if not response:
116
+ logger.error(
117
+ f"failed downloading file: startRow {self.result_link.startRowOffset}, rowCount {self.result_link.rowCount}, endRow {self.result_link.startRowOffset + self.result_link.rowCount}"
118
+ )
100
119
  return
101
120
 
121
+ logger.debug(
122
+ f"success downloading file: startRow {self.result_link.startRowOffset}, rowCount {self.result_link.rowCount}, endRow {self.result_link.startRowOffset + self.result_link.rowCount}"
123
+ )
124
+
102
125
  # Save (and decompress if needed) the downloaded file
103
126
  compressed_data = response.content
104
127
  decompressed_data = (
@@ -109,15 +132,22 @@ class ResultSetDownloadHandler(threading.Thread):
109
132
  self.result_file = decompressed_data
110
133
 
111
134
  # The size of the downloaded file should match the size specified from TSparkArrowResultLink
112
- self.is_file_downloaded_successfully = (
113
- len(self.result_file) == self.result_link.bytesNum
135
+ success = len(self.result_file) == self.result_link.bytesNum
136
+ logger.debug(
137
+ f"download successful file: startRow {self.result_link.startRowOffset}, rowCount {self.result_link.rowCount}, endRow {self.result_link.startRowOffset + self.result_link.rowCount}"
114
138
  )
139
+ self.is_file_downloaded_successfully = success
115
140
  except Exception as e:
141
+ logger.error(
142
+ f"exception downloading file: startRow {self.result_link.startRowOffset}, rowCount {self.result_link.rowCount}, endRow {self.result_link.startRowOffset + self.result_link.rowCount}"
143
+ )
116
144
  logger.error(e)
117
145
  self.is_file_downloaded_successfully = False
118
146
 
119
147
  finally:
120
- session and session.close()
148
+ logger.debug(
149
+ f"signal finished file: startRow {self.result_link.startRowOffset}, rowCount {self.result_link.rowCount}, endRow {self.result_link.startRowOffset + self.result_link.rowCount}"
150
+ )
121
151
  # Awaken threads waiting for this to be true which signals the run is complete
122
152
  self.is_download_finished.set()
123
153
 
@@ -145,6 +175,7 @@ class ResultSetDownloadHandler(threading.Thread):
145
175
  link.expiryTime < current_time
146
176
  or link.expiryTime - current_time < expiry_buffer_secs
147
177
  ):
178
+ logger.debug("link expired")
148
179
  return True
149
180
  return False
150
181
 
@@ -171,3 +202,38 @@ class ResultSetDownloadHandler(threading.Thread):
171
202
  uncompressed_data += data
172
203
  start += num_bytes
173
204
  return uncompressed_data
205
+
206
+
207
+ def http_get_with_retry(url, max_retries=5, backoff_factor=2, download_timeout=60):
208
+ attempts = 0
209
+ pattern = re.compile(r"(\?|&)([\w-]+)=([^&\s]+)")
210
+ mask = r"\1\2=<REDACTED>"
211
+
212
+ # TODO: introduce connection pooling. I am seeing weird errors without it.
213
+ while attempts < max_retries:
214
+ try:
215
+ session = requests.Session()
216
+ session.timeout = download_timeout
217
+ response = session.get(url)
218
+
219
+ # Check if the response status code is in the 2xx range for success
220
+ if response.status_code == 200:
221
+ return response
222
+ else:
223
+ logger.error(response)
224
+ except requests.RequestException as e:
225
+ # if this is not redacted, it will print the pre-signed URL
226
+ logger.error(f"request failed with exception: {re.sub(pattern, mask, str(e))}")
227
+ finally:
228
+ session.close()
229
+ # Exponential backoff before the next attempt
230
+ wait_time = backoff_factor**attempts
231
+ logger.info(f"retrying in {wait_time} seconds...")
232
+ time.sleep(wait_time)
233
+
234
+ attempts += 1
235
+
236
+ logger.error(
237
+ f"exceeded maximum number of retries ({max_retries}) while downloading result."
238
+ )
239
+ return None
@@ -115,3 +115,7 @@ class SessionAlreadyClosedError(RequestError):
115
115
 
116
116
  class CursorAlreadyClosedError(RequestError):
117
117
  """Thrown if CancelOperation receives a code 404. ThriftBackend should gracefully proceed as this is expected."""
118
+
119
+
120
+ class ResultSetDownloadError(RequestError):
121
+ """Thrown if there was an error during the download of a result set"""
@@ -371,13 +371,16 @@ class ThriftBackend:
371
371
 
372
372
  this_method_name = getattr(method, "__name__")
373
373
 
374
- logger.debug("Sending request: {}(<REDACTED>)".format(this_method_name))
374
+ logger.debug(
375
+ "sending thrift request: {}(<REDACTED>)".format(this_method_name)
376
+ )
375
377
  unsafe_logger.debug("Sending request: {}".format(request))
376
378
 
377
379
  # These three lines are no-ops if the v3 retry policy is not in use
378
- this_command_type = CommandType.get(this_method_name)
379
- self._transport.set_retry_command_type(this_command_type)
380
- self._transport.startRetryTimer()
380
+ if self.enable_v3_retries:
381
+ this_command_type = CommandType.get(this_method_name)
382
+ self._transport.set_retry_command_type(this_command_type)
383
+ self._transport.startRetryTimer()
381
384
 
382
385
  response = method(request)
383
386
 
@@ -386,7 +389,9 @@ class ThriftBackend:
386
389
 
387
390
  # We need to call type(response) here because thrift doesn't implement __name__ attributes for thrift responses
388
391
  logger.debug(
389
- "Received response: {}(<REDACTED>)".format(type(response).__name__)
392
+ "received thrift response: {}(<REDACTED>)".format(
393
+ type(response).__name__
394
+ )
390
395
  )
391
396
  unsafe_logger.debug("Received response: {}".format(response))
392
397
  return response
@@ -740,6 +745,7 @@ class ThriftBackend:
740
745
  lz4_compressed = t_result_set_metadata_resp.lz4Compressed
741
746
  is_staging_operation = t_result_set_metadata_resp.isStagingOperation
742
747
  if direct_results and direct_results.resultSet:
748
+ logger.debug(f"received direct results")
743
749
  assert direct_results.resultSet.results.startRowOffset == 0
744
750
  assert direct_results.resultSetMetadata
745
751
 
@@ -752,6 +758,7 @@ class ThriftBackend:
752
758
  description=description,
753
759
  )
754
760
  else:
761
+ logger.debug(f"must fetch results")
755
762
  arrow_queue_opt = None
756
763
  return ExecuteResponse(
757
764
  arrow_queue=arrow_queue_opt,
@@ -815,6 +822,10 @@ class ThriftBackend:
815
822
  ):
816
823
  assert session_handle is not None
817
824
 
825
+ logger.debug(
826
+ f"executing: cloud fetch: {use_cloud_fetch}, max rows: {max_rows}, max bytes: {max_bytes}"
827
+ )
828
+
818
829
  spark_arrow_types = ttypes.TSparkArrowTypes(
819
830
  timestampAsArrow=self._use_arrow_native_timestamps,
820
831
  decimalAsArrow=self._use_arrow_native_decimals,
@@ -929,6 +940,7 @@ class ThriftBackend:
929
940
  return self._handle_execute_response(resp, cursor)
930
941
 
931
942
  def _handle_execute_response(self, resp, cursor):
943
+ logger.debug(f"got execute response")
932
944
  cursor.active_op_handle = resp.operationHandle
933
945
  self._check_direct_results_for_error(resp.directResults)
934
946
 
@@ -949,6 +961,7 @@ class ThriftBackend:
949
961
  arrow_schema_bytes,
950
962
  description,
951
963
  ):
964
+ logger.debug("started to fetch results")
952
965
  assert op_handle is not None
953
966
 
954
967
  req = ttypes.TFetchResultsReq(
@@ -5,6 +5,7 @@ from decimal import Decimal
5
5
  import datetime
6
6
  import decimal
7
7
  from enum import Enum
8
+ import logging
8
9
  import lz4.frame
9
10
  from typing import Dict, List, Union, Any
10
11
  import pyarrow
@@ -18,6 +19,7 @@ from databricks.sql.thrift_api.TCLIService.ttypes import (
18
19
  )
19
20
 
20
21
  BIT_MASKS = [1, 2, 4, 8, 16, 32, 64, 128]
22
+ logger = logging.getLogger(__name__)
21
23
 
22
24
 
23
25
  class ResultSetQueue(ABC):
@@ -71,6 +73,9 @@ class ResultSetQueueFactory(ABC):
71
73
  )
72
74
  return ArrowQueue(converted_arrow_table, n_valid_rows)
73
75
  elif row_set_type == TSparkRowSetType.URL_BASED_SET:
76
+ logger.debug(
77
+ f"built cloud fetch queue for {len(t_row_set.resultLinks)} links."
78
+ )
74
79
  return CloudFetchQueue(
75
80
  arrow_schema_bytes,
76
81
  start_row_offset=t_row_set.startRowOffset,
@@ -146,6 +151,9 @@ class CloudFetchQueue(ResultSetQueue):
146
151
  self.lz4_compressed = lz4_compressed
147
152
  self.description = description
148
153
 
154
+ logger.debug(
155
+ f"creating cloud fetch queue for {len(result_links)} links and max_download_threads {self.max_download_threads}."
156
+ )
149
157
  self.download_manager = ResultFileDownloadManager(
150
158
  self.max_download_threads, self.lz4_compressed
151
159
  )