databricks-sql-connector 2.9.3__tar.gz → 2.9.4b1__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (34) hide show
  1. {databricks_sql_connector-2.9.3 → databricks_sql_connector-2.9.4b1}/CHANGELOG.md +5 -0
  2. {databricks_sql_connector-2.9.3 → databricks_sql_connector-2.9.4b1}/PKG-INFO +2 -1
  3. {databricks_sql_connector-2.9.3 → databricks_sql_connector-2.9.4b1}/pyproject.toml +1 -1
  4. {databricks_sql_connector-2.9.3 → databricks_sql_connector-2.9.4b1}/src/databricks/sql/__init__.py +33 -1
  5. {databricks_sql_connector-2.9.3 → databricks_sql_connector-2.9.4b1}/src/databricks/sql/cloudfetch/download_manager.py +8 -32
  6. {databricks_sql_connector-2.9.3 → databricks_sql_connector-2.9.4b1}/src/databricks/sql/cloudfetch/downloader.py +93 -27
  7. {databricks_sql_connector-2.9.3 → databricks_sql_connector-2.9.4b1}/src/databricks/sql/exc.py +4 -0
  8. {databricks_sql_connector-2.9.3 → databricks_sql_connector-2.9.4b1}/src/databricks/sql/thrift_backend.py +14 -2
  9. {databricks_sql_connector-2.9.3 → databricks_sql_connector-2.9.4b1}/src/databricks/sql/utils.py +8 -0
  10. {databricks_sql_connector-2.9.3 → databricks_sql_connector-2.9.4b1}/LICENSE +0 -0
  11. {databricks_sql_connector-2.9.3 → databricks_sql_connector-2.9.4b1}/README.md +0 -0
  12. {databricks_sql_connector-2.9.3 → databricks_sql_connector-2.9.4b1}/src/databricks/__init__.py +0 -0
  13. {databricks_sql_connector-2.9.3 → databricks_sql_connector-2.9.4b1}/src/databricks/sql/auth/__init__.py +0 -0
  14. {databricks_sql_connector-2.9.3 → databricks_sql_connector-2.9.4b1}/src/databricks/sql/auth/auth.py +0 -0
  15. {databricks_sql_connector-2.9.3 → databricks_sql_connector-2.9.4b1}/src/databricks/sql/auth/authenticators.py +0 -0
  16. {databricks_sql_connector-2.9.3 → databricks_sql_connector-2.9.4b1}/src/databricks/sql/auth/endpoint.py +0 -0
  17. {databricks_sql_connector-2.9.3 → databricks_sql_connector-2.9.4b1}/src/databricks/sql/auth/oauth.py +0 -0
  18. {databricks_sql_connector-2.9.3 → databricks_sql_connector-2.9.4b1}/src/databricks/sql/auth/oauth_http_handler.py +0 -0
  19. {databricks_sql_connector-2.9.3 → databricks_sql_connector-2.9.4b1}/src/databricks/sql/auth/retry.py +0 -0
  20. {databricks_sql_connector-2.9.3 → databricks_sql_connector-2.9.4b1}/src/databricks/sql/auth/thrift_http_client.py +0 -0
  21. {databricks_sql_connector-2.9.3 → databricks_sql_connector-2.9.4b1}/src/databricks/sql/client.py +0 -0
  22. {databricks_sql_connector-2.9.3 → databricks_sql_connector-2.9.4b1}/src/databricks/sql/experimental/__init__.py +0 -0
  23. {databricks_sql_connector-2.9.3 → databricks_sql_connector-2.9.4b1}/src/databricks/sql/experimental/oauth_persistence.py +0 -0
  24. {databricks_sql_connector-2.9.3 → databricks_sql_connector-2.9.4b1}/src/databricks/sql/thrift_api/TCLIService/TCLIService-remote +0 -0
  25. {databricks_sql_connector-2.9.3 → databricks_sql_connector-2.9.4b1}/src/databricks/sql/thrift_api/TCLIService/TCLIService.py +0 -0
  26. {databricks_sql_connector-2.9.3 → databricks_sql_connector-2.9.4b1}/src/databricks/sql/thrift_api/TCLIService/__init__.py +0 -0
  27. {databricks_sql_connector-2.9.3 → databricks_sql_connector-2.9.4b1}/src/databricks/sql/thrift_api/TCLIService/constants.py +0 -0
  28. {databricks_sql_connector-2.9.3 → databricks_sql_connector-2.9.4b1}/src/databricks/sql/thrift_api/TCLIService/ttypes.py +0 -0
  29. {databricks_sql_connector-2.9.3 → databricks_sql_connector-2.9.4b1}/src/databricks/sql/thrift_api/__init__.py +0 -0
  30. {databricks_sql_connector-2.9.3 → databricks_sql_connector-2.9.4b1}/src/databricks/sql/types.py +0 -0
  31. {databricks_sql_connector-2.9.3 → databricks_sql_connector-2.9.4b1}/src/databricks/sqlalchemy/__init__.py +0 -0
  32. {databricks_sql_connector-2.9.3 → databricks_sql_connector-2.9.4b1}/src/databricks/sqlalchemy/dialect/__init__.py +0 -0
  33. {databricks_sql_connector-2.9.3 → databricks_sql_connector-2.9.4b1}/src/databricks/sqlalchemy/dialect/base.py +0 -0
  34. {databricks_sql_connector-2.9.3 → databricks_sql_connector-2.9.4b1}/src/databricks/sqlalchemy/dialect/compiler.py +0 -0
@@ -2,6 +2,11 @@
2
2
 
3
3
  ## 2.9.4 (Unreleased)
4
4
 
5
+ ## 2.9.4b1 (2024-02-16)
6
+
7
+ - Fix: Cloud fetch file download errors (#356)
8
+ - Fix: Redact the URL query parameters from the urllib3.connectionpool logs (#341)
9
+
5
10
  ## 2.9.3 (2023-08-24)
6
11
 
7
12
  - Fix: Connections failed when urllib3~=1.0.0 is installed (#206)
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: databricks-sql-connector
3
- Version: 2.9.3
3
+ Version: 2.9.4b1
4
4
  Summary: Databricks SQL Connector for Python
5
5
  License: Apache-2.0
6
6
  Author: Databricks
@@ -12,6 +12,7 @@ Classifier: Programming Language :: Python :: 3.8
12
12
  Classifier: Programming Language :: Python :: 3.9
13
13
  Classifier: Programming Language :: Python :: 3.10
14
14
  Classifier: Programming Language :: Python :: 3.11
15
+ Classifier: Programming Language :: Python :: 3.12
15
16
  Requires-Dist: alembic (>=1.0.11,<2.0.0)
16
17
  Requires-Dist: lz4 (>=4.0.2,<5.0.0)
17
18
  Requires-Dist: numpy (>=1.16.6) ; python_version >= "3.7" and python_version < "3.11"
@@ -1,6 +1,6 @@
1
1
  [tool.poetry]
2
2
  name = "databricks-sql-connector"
3
- version = "2.9.3"
3
+ version = "2.9.4b1"
4
4
  description = "Databricks SQL Connector for Python"
5
5
  authors = ["Databricks <databricks-sql-connector-maintainers@databricks.com>"]
6
6
  license = "Apache-2.0"
@@ -7,6 +7,38 @@ apilevel = "2.0"
7
7
  threadsafety = 1 # Threads may share the module, but not connections.
8
8
  paramstyle = "pyformat" # Python extended format codes, e.g. ...WHERE name=%(name)s
9
9
 
10
+ import re
11
+
12
+
13
+ class RedactUrlQueryParamsFilter(logging.Filter):
14
+ pattern = re.compile(r"(\?|&)([\w-]+)=([^&\s]+)")
15
+ mask = r"\1\2=<REDACTED>"
16
+
17
+ def __init__(self):
18
+ super().__init__()
19
+
20
+ def redact(self, string):
21
+ return re.sub(self.pattern, self.mask, str(string))
22
+
23
+ def filter(self, record):
24
+ record.msg = self.redact(str(record.msg))
25
+ if isinstance(record.args, dict):
26
+ for k in record.args.keys():
27
+ record.args[k] = (
28
+ self.redact(record.args[k])
29
+ if isinstance(record.arg[k], str)
30
+ else record.args[k]
31
+ )
32
+ else:
33
+ record.args = tuple(
34
+ (self.redact(arg) if isinstance(arg, str) else arg)
35
+ for arg in record.args
36
+ )
37
+
38
+ return True
39
+
40
+
41
+ logging.getLogger("urllib3.connectionpool").addFilter(RedactUrlQueryParamsFilter())
10
42
 
11
43
  class DBAPITypeObject(object):
12
44
  def __init__(self, *values):
@@ -28,7 +60,7 @@ DATETIME = DBAPITypeObject("timestamp")
28
60
  DATE = DBAPITypeObject("date")
29
61
  ROWID = DBAPITypeObject()
30
62
 
31
- __version__ = "2.9.3"
63
+ __version__ = "2.9.4b1"
32
64
  USER_AGENT_NAME = "PyDatabricksSqlConnector"
33
65
 
34
66
  # These two functions are pyhive legacy
@@ -8,6 +8,7 @@ from databricks.sql.cloudfetch.downloader import (
8
8
  ResultSetDownloadHandler,
9
9
  DownloadableResultSettings,
10
10
  )
11
+ from databricks.sql.exc import ResultSetDownloadError
11
12
  from databricks.sql.thrift_api.TCLIService.ttypes import TSparkArrowResultLink
12
13
 
13
14
  logger = logging.getLogger(__name__)
@@ -34,8 +35,6 @@ class ResultFileDownloadManager:
34
35
  self.download_handlers: List[ResultSetDownloadHandler] = []
35
36
  self.thread_pool = ThreadPoolExecutor(max_workers=max_download_threads + 1)
36
37
  self.downloadable_result_settings = DownloadableResultSettings(lz4_compressed)
37
- self.fetch_need_retry = False
38
- self.num_consecutive_result_file_download_retries = 0
39
38
 
40
39
  def add_file_links(
41
40
  self, t_spark_arrow_result_links: List[TSparkArrowResultLink]
@@ -81,13 +80,15 @@ class ResultFileDownloadManager:
81
80
 
82
81
  # Find next file
83
82
  idx = self._find_next_file_index(next_row_offset)
83
+ # is this correct?
84
84
  if idx is None:
85
85
  self._shutdown_manager()
86
+ logger.debug("could not find next file index")
86
87
  return None
87
88
  handler = self.download_handlers[idx]
88
89
 
89
90
  # Check (and wait) for download status
90
- if self._check_if_download_successful(handler):
91
+ if handler.is_file_download_successful():
91
92
  # Buffer should be empty so set buffer to new ArrowQueue with result_file
92
93
  result = DownloadedFile(
93
94
  handler.result_file,
@@ -97,9 +98,11 @@ class ResultFileDownloadManager:
97
98
  self.download_handlers.pop(idx)
98
99
  # Return True upon successful download to continue loop and not force a retry
99
100
  return result
100
- # Download was not successful for next download item, force a retry
101
+ # Download was not successful for next download item. Fail
101
102
  self._shutdown_manager()
102
- return None
103
+ raise ResultSetDownloadError(
104
+ f"Download failed for result set starting at {next_row_offset}"
105
+ )
103
106
 
104
107
  def _remove_past_handlers(self, next_row_offset: int):
105
108
  # Any link in which its start to end range doesn't include the next row to be fetched does not need downloading
@@ -133,33 +136,6 @@ class ResultFileDownloadManager:
133
136
  ]
134
137
  return next_indices[0] if len(next_indices) > 0 else None
135
138
 
136
- def _check_if_download_successful(self, handler: ResultSetDownloadHandler):
137
- # Check (and wait until download finishes) if download was successful
138
- if not handler.is_file_download_successful():
139
- if handler.is_link_expired:
140
- self.fetch_need_retry = True
141
- return False
142
- elif handler.is_download_timedout:
143
- # Consecutive file retries should not exceed threshold in settings
144
- if (
145
- self.num_consecutive_result_file_download_retries
146
- >= self.downloadable_result_settings.max_consecutive_file_download_retries
147
- ):
148
- self.fetch_need_retry = True
149
- return False
150
- self.num_consecutive_result_file_download_retries += 1
151
-
152
- # Re-submit handler run to thread pool and recursively check download status
153
- self.thread_pool.submit(handler.run)
154
- return self._check_if_download_successful(handler)
155
- else:
156
- self.fetch_need_retry = True
157
- return False
158
-
159
- self.num_consecutive_result_file_download_retries = 0
160
- self.fetch_need_retry = False
161
- return True
162
-
163
139
  def _shutdown_manager(self):
164
140
  # Clear download handlers and shutdown the thread pool
165
141
  self.download_handlers = []
@@ -1,15 +1,17 @@
1
1
  import logging
2
2
  from dataclasses import dataclass
3
-
4
3
  import requests
5
4
  import lz4.frame
6
5
  import threading
7
6
  import time
8
-
7
+ import os
8
+ import re
9
9
  from databricks.sql.thrift_api.TCLIService.ttypes import TSparkArrowResultLink
10
10
 
11
11
  logger = logging.getLogger(__name__)
12
12
 
13
+ DEFAULT_CLOUD_FILE_TIMEOUT = int(os.getenv("DATABRICKS_CLOUD_FILE_TIMEOUT", 60))
14
+
13
15
 
14
16
  @dataclass
15
17
  class DownloadableResultSettings:
@@ -20,13 +22,17 @@ class DownloadableResultSettings:
20
22
  is_lz4_compressed (bool): Whether file is expected to be lz4 compressed.
21
23
  link_expiry_buffer_secs (int): Time in seconds to prevent download of a link before it expires. Default 0 secs.
22
24
  download_timeout (int): Timeout for download requests. Default 60 secs.
23
- max_consecutive_file_download_retries (int): Number of consecutive download retries before shutting down.
25
+ download_max_retries (int): Number of consecutive download retries before shutting down.
26
+ max_retries (int): Number of consecutive download retries before shutting down.
27
+ backoff_factor (int): Factor to increase wait time between retries.
28
+
24
29
  """
25
30
 
26
31
  is_lz4_compressed: bool
27
32
  link_expiry_buffer_secs: int = 0
28
- download_timeout: int = 60
29
- max_consecutive_file_download_retries: int = 0
33
+ download_timeout: int = DEFAULT_CLOUD_FILE_TIMEOUT
34
+ max_retries: int = 5
35
+ backoff_factor: int = 2
30
36
 
31
37
 
32
38
  class ResultSetDownloadHandler(threading.Thread):
@@ -57,16 +63,21 @@ class ResultSetDownloadHandler(threading.Thread):
57
63
  else None
58
64
  )
59
65
  try:
66
+ logger.debug(
67
+ f"waiting for at most {timeout} seconds for download file: startRow {self.result_link.startRowOffset}, rowCount {self.result_link.rowCount}, endRow {self.result_link.startRowOffset + self.result_link.rowCount}"
68
+ )
69
+
60
70
  if not self.is_download_finished.wait(timeout=timeout):
61
71
  self.is_download_timedout = True
62
- logger.debug(
63
- "Cloud fetch download timed out after {} seconds for link representing rows {} to {}".format(
64
- self.settings.download_timeout,
65
- self.result_link.startRowOffset,
66
- self.result_link.startRowOffset + self.result_link.rowCount,
67
- )
72
+ logger.error(
73
+ f"cloud fetch download timed out after {self.settings.download_timeout} seconds for link representing rows {self.result_link.startRowOffset} to {self.result_link.startRowOffset + self.result_link.rowCount}"
68
74
  )
69
- return False
75
+ # there are some weird cases when the is_download_finished is not set, but the file is downloaded successfully
76
+ return self.is_file_downloaded_successfully
77
+
78
+ logger.debug(
79
+ f"finish waiting for download file: startRow {self.result_link.startRowOffset}, rowCount {self.result_link.rowCount}, endRow {self.result_link.startRowOffset + self.result_link.rowCount}"
80
+ )
70
81
  except Exception as e:
71
82
  logger.error(e)
72
83
  return False
@@ -81,24 +92,36 @@ class ResultSetDownloadHandler(threading.Thread):
81
92
  """
82
93
  self._reset()
83
94
 
84
- # Check if link is already expired or is expiring
85
- if ResultSetDownloadHandler.check_link_expired(
86
- self.result_link, self.settings.link_expiry_buffer_secs
87
- ):
88
- self.is_link_expired = True
89
- return
95
+ try:
96
+ # Check if link is already expired or is expiring
97
+ if ResultSetDownloadHandler.check_link_expired(
98
+ self.result_link, self.settings.link_expiry_buffer_secs
99
+ ):
100
+ self.is_link_expired = True
101
+ return
90
102
 
91
- session = requests.Session()
92
- session.timeout = self.settings.download_timeout
103
+ logger.debug(
104
+ f"started to download file: startRow {self.result_link.startRowOffset}, rowCount {self.result_link.rowCount}, endRow {self.result_link.startRowOffset + self.result_link.rowCount}"
105
+ )
93
106
 
94
- try:
95
107
  # Get the file via HTTP request
96
- response = session.get(self.result_link.fileLink)
108
+ response = http_get_with_retry(
109
+ url=self.result_link.fileLink,
110
+ max_retries=self.settings.max_retries,
111
+ backoff_factor=self.settings.backoff_factor,
112
+ download_timeout=self.settings.download_timeout,
113
+ )
97
114
 
98
- if not response.ok:
99
- self.is_file_downloaded_successfully = False
115
+ if not response:
116
+ logger.error(
117
+ f"failed downloading file: startRow {self.result_link.startRowOffset}, rowCount {self.result_link.rowCount}, endRow {self.result_link.startRowOffset + self.result_link.rowCount}"
118
+ )
100
119
  return
101
120
 
121
+ logger.debug(
122
+ f"success downloading file: startRow {self.result_link.startRowOffset}, rowCount {self.result_link.rowCount}, endRow {self.result_link.startRowOffset + self.result_link.rowCount}"
123
+ )
124
+
102
125
  # Save (and decompress if needed) the downloaded file
103
126
  compressed_data = response.content
104
127
  decompressed_data = (
@@ -109,15 +132,22 @@ class ResultSetDownloadHandler(threading.Thread):
109
132
  self.result_file = decompressed_data
110
133
 
111
134
  # The size of the downloaded file should match the size specified from TSparkArrowResultLink
112
- self.is_file_downloaded_successfully = (
113
- len(self.result_file) == self.result_link.bytesNum
135
+ success = len(self.result_file) == self.result_link.bytesNum
136
+ logger.debug(
137
+ f"download successful file: startRow {self.result_link.startRowOffset}, rowCount {self.result_link.rowCount}, endRow {self.result_link.startRowOffset + self.result_link.rowCount}"
114
138
  )
139
+ self.is_file_downloaded_successfully = success
115
140
  except Exception as e:
141
+ logger.error(
142
+ f"exception downloading file: startRow {self.result_link.startRowOffset}, rowCount {self.result_link.rowCount}, endRow {self.result_link.startRowOffset + self.result_link.rowCount}"
143
+ )
116
144
  logger.error(e)
117
145
  self.is_file_downloaded_successfully = False
118
146
 
119
147
  finally:
120
- session and session.close()
148
+ logger.debug(
149
+ f"signal finished file: startRow {self.result_link.startRowOffset}, rowCount {self.result_link.rowCount}, endRow {self.result_link.startRowOffset + self.result_link.rowCount}"
150
+ )
121
151
  # Awaken threads waiting for this to be true which signals the run is complete
122
152
  self.is_download_finished.set()
123
153
 
@@ -145,6 +175,7 @@ class ResultSetDownloadHandler(threading.Thread):
145
175
  link.expiryTime < current_time
146
176
  or link.expiryTime - current_time < expiry_buffer_secs
147
177
  ):
178
+ logger.debug("link expired")
148
179
  return True
149
180
  return False
150
181
 
@@ -171,3 +202,38 @@ class ResultSetDownloadHandler(threading.Thread):
171
202
  uncompressed_data += data
172
203
  start += num_bytes
173
204
  return uncompressed_data
205
+
206
+
207
+ def http_get_with_retry(url, max_retries=5, backoff_factor=2, download_timeout=60):
208
+ attempts = 0
209
+ pattern = re.compile(r"(\?|&)([\w-]+)=([^&\s]+)")
210
+ mask = r"\1\2=<REDACTED>"
211
+
212
+ # TODO: introduce connection pooling. I am seeing weird errors without it.
213
+ while attempts < max_retries:
214
+ try:
215
+ session = requests.Session()
216
+ session.timeout = download_timeout
217
+ response = session.get(url)
218
+
219
+ # Check if the response status code is in the 2xx range for success
220
+ if response.status_code == 200:
221
+ return response
222
+ else:
223
+ logger.error(response)
224
+ except requests.RequestException as e:
225
+ # if this is not redacted, it will print the pre-signed URL
226
+ logger.error(f"request failed with exception: {re.sub(pattern, mask, str(e))}")
227
+ finally:
228
+ session.close()
229
+ # Exponential backoff before the next attempt
230
+ wait_time = backoff_factor**attempts
231
+ logger.info(f"retrying in {wait_time} seconds...")
232
+ time.sleep(wait_time)
233
+
234
+ attempts += 1
235
+
236
+ logger.error(
237
+ f"exceeded maximum number of retries ({max_retries}) while downloading result."
238
+ )
239
+ return None
@@ -115,3 +115,7 @@ class SessionAlreadyClosedError(RequestError):
115
115
 
116
116
  class CursorAlreadyClosedError(RequestError):
117
117
  """Thrown if CancelOperation receives a code 404. ThriftBackend should gracefully proceed as this is expected."""
118
+
119
+
120
+ class ResultSetDownloadError(RequestError):
121
+ """Thrown if there was an error during the download of a result set"""
@@ -371,7 +371,9 @@ class ThriftBackend:
371
371
 
372
372
  this_method_name = getattr(method, "__name__")
373
373
 
374
- logger.debug("Sending request: {}(<REDACTED>)".format(this_method_name))
374
+ logger.debug(
375
+ "sending thrift request: {}(<REDACTED>)".format(this_method_name)
376
+ )
375
377
  unsafe_logger.debug("Sending request: {}".format(request))
376
378
 
377
379
  # These three lines are no-ops if the v3 retry policy is not in use
@@ -387,7 +389,9 @@ class ThriftBackend:
387
389
 
388
390
  # We need to call type(response) here because thrift doesn't implement __name__ attributes for thrift responses
389
391
  logger.debug(
390
- "Received response: {}(<REDACTED>)".format(type(response).__name__)
392
+ "received thrift response: {}(<REDACTED>)".format(
393
+ type(response).__name__
394
+ )
391
395
  )
392
396
  unsafe_logger.debug("Received response: {}".format(response))
393
397
  return response
@@ -741,6 +745,7 @@ class ThriftBackend:
741
745
  lz4_compressed = t_result_set_metadata_resp.lz4Compressed
742
746
  is_staging_operation = t_result_set_metadata_resp.isStagingOperation
743
747
  if direct_results and direct_results.resultSet:
748
+ logger.debug(f"received direct results")
744
749
  assert direct_results.resultSet.results.startRowOffset == 0
745
750
  assert direct_results.resultSetMetadata
746
751
 
@@ -753,6 +758,7 @@ class ThriftBackend:
753
758
  description=description,
754
759
  )
755
760
  else:
761
+ logger.debug(f"must fetch results")
756
762
  arrow_queue_opt = None
757
763
  return ExecuteResponse(
758
764
  arrow_queue=arrow_queue_opt,
@@ -816,6 +822,10 @@ class ThriftBackend:
816
822
  ):
817
823
  assert session_handle is not None
818
824
 
825
+ logger.debug(
826
+ f"executing: cloud fetch: {use_cloud_fetch}, max rows: {max_rows}, max bytes: {max_bytes}"
827
+ )
828
+
819
829
  spark_arrow_types = ttypes.TSparkArrowTypes(
820
830
  timestampAsArrow=self._use_arrow_native_timestamps,
821
831
  decimalAsArrow=self._use_arrow_native_decimals,
@@ -930,6 +940,7 @@ class ThriftBackend:
930
940
  return self._handle_execute_response(resp, cursor)
931
941
 
932
942
  def _handle_execute_response(self, resp, cursor):
943
+ logger.debug(f"got execute response")
933
944
  cursor.active_op_handle = resp.operationHandle
934
945
  self._check_direct_results_for_error(resp.directResults)
935
946
 
@@ -950,6 +961,7 @@ class ThriftBackend:
950
961
  arrow_schema_bytes,
951
962
  description,
952
963
  ):
964
+ logger.debug("started to fetch results")
953
965
  assert op_handle is not None
954
966
 
955
967
  req = ttypes.TFetchResultsReq(
@@ -5,6 +5,7 @@ from decimal import Decimal
5
5
  import datetime
6
6
  import decimal
7
7
  from enum import Enum
8
+ import logging
8
9
  import lz4.frame
9
10
  from typing import Dict, List, Union, Any
10
11
  import pyarrow
@@ -18,6 +19,7 @@ from databricks.sql.thrift_api.TCLIService.ttypes import (
18
19
  )
19
20
 
20
21
  BIT_MASKS = [1, 2, 4, 8, 16, 32, 64, 128]
22
+ logger = logging.getLogger(__name__)
21
23
 
22
24
 
23
25
  class ResultSetQueue(ABC):
@@ -71,6 +73,9 @@ class ResultSetQueueFactory(ABC):
71
73
  )
72
74
  return ArrowQueue(converted_arrow_table, n_valid_rows)
73
75
  elif row_set_type == TSparkRowSetType.URL_BASED_SET:
76
+ logger.debug(
77
+ f"built cloud fetch queue for {len(t_row_set.resultLinks)} links."
78
+ )
74
79
  return CloudFetchQueue(
75
80
  arrow_schema_bytes,
76
81
  start_row_offset=t_row_set.startRowOffset,
@@ -146,6 +151,9 @@ class CloudFetchQueue(ResultSetQueue):
146
151
  self.lz4_compressed = lz4_compressed
147
152
  self.description = description
148
153
 
154
+ logger.debug(
155
+ f"creating cloud fetch queue for {len(result_links)} links and max_download_threads {self.max_download_threads}."
156
+ )
149
157
  self.download_manager = ResultFileDownloadManager(
150
158
  self.max_download_threads, self.lz4_compressed
151
159
  )