databricks-sql-connector 2.9.3__tar.gz → 2.9.4b1__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {databricks_sql_connector-2.9.3 → databricks_sql_connector-2.9.4b1}/CHANGELOG.md +5 -0
- {databricks_sql_connector-2.9.3 → databricks_sql_connector-2.9.4b1}/PKG-INFO +2 -1
- {databricks_sql_connector-2.9.3 → databricks_sql_connector-2.9.4b1}/pyproject.toml +1 -1
- {databricks_sql_connector-2.9.3 → databricks_sql_connector-2.9.4b1}/src/databricks/sql/__init__.py +33 -1
- {databricks_sql_connector-2.9.3 → databricks_sql_connector-2.9.4b1}/src/databricks/sql/cloudfetch/download_manager.py +8 -32
- {databricks_sql_connector-2.9.3 → databricks_sql_connector-2.9.4b1}/src/databricks/sql/cloudfetch/downloader.py +93 -27
- {databricks_sql_connector-2.9.3 → databricks_sql_connector-2.9.4b1}/src/databricks/sql/exc.py +4 -0
- {databricks_sql_connector-2.9.3 → databricks_sql_connector-2.9.4b1}/src/databricks/sql/thrift_backend.py +14 -2
- {databricks_sql_connector-2.9.3 → databricks_sql_connector-2.9.4b1}/src/databricks/sql/utils.py +8 -0
- {databricks_sql_connector-2.9.3 → databricks_sql_connector-2.9.4b1}/LICENSE +0 -0
- {databricks_sql_connector-2.9.3 → databricks_sql_connector-2.9.4b1}/README.md +0 -0
- {databricks_sql_connector-2.9.3 → databricks_sql_connector-2.9.4b1}/src/databricks/__init__.py +0 -0
- {databricks_sql_connector-2.9.3 → databricks_sql_connector-2.9.4b1}/src/databricks/sql/auth/__init__.py +0 -0
- {databricks_sql_connector-2.9.3 → databricks_sql_connector-2.9.4b1}/src/databricks/sql/auth/auth.py +0 -0
- {databricks_sql_connector-2.9.3 → databricks_sql_connector-2.9.4b1}/src/databricks/sql/auth/authenticators.py +0 -0
- {databricks_sql_connector-2.9.3 → databricks_sql_connector-2.9.4b1}/src/databricks/sql/auth/endpoint.py +0 -0
- {databricks_sql_connector-2.9.3 → databricks_sql_connector-2.9.4b1}/src/databricks/sql/auth/oauth.py +0 -0
- {databricks_sql_connector-2.9.3 → databricks_sql_connector-2.9.4b1}/src/databricks/sql/auth/oauth_http_handler.py +0 -0
- {databricks_sql_connector-2.9.3 → databricks_sql_connector-2.9.4b1}/src/databricks/sql/auth/retry.py +0 -0
- {databricks_sql_connector-2.9.3 → databricks_sql_connector-2.9.4b1}/src/databricks/sql/auth/thrift_http_client.py +0 -0
- {databricks_sql_connector-2.9.3 → databricks_sql_connector-2.9.4b1}/src/databricks/sql/client.py +0 -0
- {databricks_sql_connector-2.9.3 → databricks_sql_connector-2.9.4b1}/src/databricks/sql/experimental/__init__.py +0 -0
- {databricks_sql_connector-2.9.3 → databricks_sql_connector-2.9.4b1}/src/databricks/sql/experimental/oauth_persistence.py +0 -0
- {databricks_sql_connector-2.9.3 → databricks_sql_connector-2.9.4b1}/src/databricks/sql/thrift_api/TCLIService/TCLIService-remote +0 -0
- {databricks_sql_connector-2.9.3 → databricks_sql_connector-2.9.4b1}/src/databricks/sql/thrift_api/TCLIService/TCLIService.py +0 -0
- {databricks_sql_connector-2.9.3 → databricks_sql_connector-2.9.4b1}/src/databricks/sql/thrift_api/TCLIService/__init__.py +0 -0
- {databricks_sql_connector-2.9.3 → databricks_sql_connector-2.9.4b1}/src/databricks/sql/thrift_api/TCLIService/constants.py +0 -0
- {databricks_sql_connector-2.9.3 → databricks_sql_connector-2.9.4b1}/src/databricks/sql/thrift_api/TCLIService/ttypes.py +0 -0
- {databricks_sql_connector-2.9.3 → databricks_sql_connector-2.9.4b1}/src/databricks/sql/thrift_api/__init__.py +0 -0
- {databricks_sql_connector-2.9.3 → databricks_sql_connector-2.9.4b1}/src/databricks/sql/types.py +0 -0
- {databricks_sql_connector-2.9.3 → databricks_sql_connector-2.9.4b1}/src/databricks/sqlalchemy/__init__.py +0 -0
- {databricks_sql_connector-2.9.3 → databricks_sql_connector-2.9.4b1}/src/databricks/sqlalchemy/dialect/__init__.py +0 -0
- {databricks_sql_connector-2.9.3 → databricks_sql_connector-2.9.4b1}/src/databricks/sqlalchemy/dialect/base.py +0 -0
- {databricks_sql_connector-2.9.3 → databricks_sql_connector-2.9.4b1}/src/databricks/sqlalchemy/dialect/compiler.py +0 -0
|
@@ -2,6 +2,11 @@
|
|
|
2
2
|
|
|
3
3
|
## 2.9.4 (Unreleased)
|
|
4
4
|
|
|
5
|
+
## 2.9.4b1 (2024-02-16)
|
|
6
|
+
|
|
7
|
+
- Fix: Cloud fetch file download errors (#356)
|
|
8
|
+
- Fix: Redact the URL query parameters from the urllib3.connectionpool logs (#341)
|
|
9
|
+
|
|
5
10
|
## 2.9.3 (2023-08-24)
|
|
6
11
|
|
|
7
12
|
- Fix: Connections failed when urllib3~=1.0.0 is installed (#206)
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.1
|
|
2
2
|
Name: databricks-sql-connector
|
|
3
|
-
Version: 2.9.
|
|
3
|
+
Version: 2.9.4b1
|
|
4
4
|
Summary: Databricks SQL Connector for Python
|
|
5
5
|
License: Apache-2.0
|
|
6
6
|
Author: Databricks
|
|
@@ -12,6 +12,7 @@ Classifier: Programming Language :: Python :: 3.8
|
|
|
12
12
|
Classifier: Programming Language :: Python :: 3.9
|
|
13
13
|
Classifier: Programming Language :: Python :: 3.10
|
|
14
14
|
Classifier: Programming Language :: Python :: 3.11
|
|
15
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
15
16
|
Requires-Dist: alembic (>=1.0.11,<2.0.0)
|
|
16
17
|
Requires-Dist: lz4 (>=4.0.2,<5.0.0)
|
|
17
18
|
Requires-Dist: numpy (>=1.16.6) ; python_version >= "3.7" and python_version < "3.11"
|
{databricks_sql_connector-2.9.3 → databricks_sql_connector-2.9.4b1}/src/databricks/sql/__init__.py
RENAMED
|
@@ -7,6 +7,38 @@ apilevel = "2.0"
|
|
|
7
7
|
threadsafety = 1 # Threads may share the module, but not connections.
|
|
8
8
|
paramstyle = "pyformat" # Python extended format codes, e.g. ...WHERE name=%(name)s
|
|
9
9
|
|
|
10
|
+
import re
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
class RedactUrlQueryParamsFilter(logging.Filter):
|
|
14
|
+
pattern = re.compile(r"(\?|&)([\w-]+)=([^&\s]+)")
|
|
15
|
+
mask = r"\1\2=<REDACTED>"
|
|
16
|
+
|
|
17
|
+
def __init__(self):
|
|
18
|
+
super().__init__()
|
|
19
|
+
|
|
20
|
+
def redact(self, string):
|
|
21
|
+
return re.sub(self.pattern, self.mask, str(string))
|
|
22
|
+
|
|
23
|
+
def filter(self, record):
|
|
24
|
+
record.msg = self.redact(str(record.msg))
|
|
25
|
+
if isinstance(record.args, dict):
|
|
26
|
+
for k in record.args.keys():
|
|
27
|
+
record.args[k] = (
|
|
28
|
+
self.redact(record.args[k])
|
|
29
|
+
if isinstance(record.arg[k], str)
|
|
30
|
+
else record.args[k]
|
|
31
|
+
)
|
|
32
|
+
else:
|
|
33
|
+
record.args = tuple(
|
|
34
|
+
(self.redact(arg) if isinstance(arg, str) else arg)
|
|
35
|
+
for arg in record.args
|
|
36
|
+
)
|
|
37
|
+
|
|
38
|
+
return True
|
|
39
|
+
|
|
40
|
+
|
|
41
|
+
logging.getLogger("urllib3.connectionpool").addFilter(RedactUrlQueryParamsFilter())
|
|
10
42
|
|
|
11
43
|
class DBAPITypeObject(object):
|
|
12
44
|
def __init__(self, *values):
|
|
@@ -28,7 +60,7 @@ DATETIME = DBAPITypeObject("timestamp")
|
|
|
28
60
|
DATE = DBAPITypeObject("date")
|
|
29
61
|
ROWID = DBAPITypeObject()
|
|
30
62
|
|
|
31
|
-
__version__ = "2.9.
|
|
63
|
+
__version__ = "2.9.4b1"
|
|
32
64
|
USER_AGENT_NAME = "PyDatabricksSqlConnector"
|
|
33
65
|
|
|
34
66
|
# These two functions are pyhive legacy
|
|
@@ -8,6 +8,7 @@ from databricks.sql.cloudfetch.downloader import (
|
|
|
8
8
|
ResultSetDownloadHandler,
|
|
9
9
|
DownloadableResultSettings,
|
|
10
10
|
)
|
|
11
|
+
from databricks.sql.exc import ResultSetDownloadError
|
|
11
12
|
from databricks.sql.thrift_api.TCLIService.ttypes import TSparkArrowResultLink
|
|
12
13
|
|
|
13
14
|
logger = logging.getLogger(__name__)
|
|
@@ -34,8 +35,6 @@ class ResultFileDownloadManager:
|
|
|
34
35
|
self.download_handlers: List[ResultSetDownloadHandler] = []
|
|
35
36
|
self.thread_pool = ThreadPoolExecutor(max_workers=max_download_threads + 1)
|
|
36
37
|
self.downloadable_result_settings = DownloadableResultSettings(lz4_compressed)
|
|
37
|
-
self.fetch_need_retry = False
|
|
38
|
-
self.num_consecutive_result_file_download_retries = 0
|
|
39
38
|
|
|
40
39
|
def add_file_links(
|
|
41
40
|
self, t_spark_arrow_result_links: List[TSparkArrowResultLink]
|
|
@@ -81,13 +80,15 @@ class ResultFileDownloadManager:
|
|
|
81
80
|
|
|
82
81
|
# Find next file
|
|
83
82
|
idx = self._find_next_file_index(next_row_offset)
|
|
83
|
+
# is this correct?
|
|
84
84
|
if idx is None:
|
|
85
85
|
self._shutdown_manager()
|
|
86
|
+
logger.debug("could not find next file index")
|
|
86
87
|
return None
|
|
87
88
|
handler = self.download_handlers[idx]
|
|
88
89
|
|
|
89
90
|
# Check (and wait) for download status
|
|
90
|
-
if
|
|
91
|
+
if handler.is_file_download_successful():
|
|
91
92
|
# Buffer should be empty so set buffer to new ArrowQueue with result_file
|
|
92
93
|
result = DownloadedFile(
|
|
93
94
|
handler.result_file,
|
|
@@ -97,9 +98,11 @@ class ResultFileDownloadManager:
|
|
|
97
98
|
self.download_handlers.pop(idx)
|
|
98
99
|
# Return True upon successful download to continue loop and not force a retry
|
|
99
100
|
return result
|
|
100
|
-
# Download was not successful for next download item
|
|
101
|
+
# Download was not successful for next download item. Fail
|
|
101
102
|
self._shutdown_manager()
|
|
102
|
-
|
|
103
|
+
raise ResultSetDownloadError(
|
|
104
|
+
f"Download failed for result set starting at {next_row_offset}"
|
|
105
|
+
)
|
|
103
106
|
|
|
104
107
|
def _remove_past_handlers(self, next_row_offset: int):
|
|
105
108
|
# Any link in which its start to end range doesn't include the next row to be fetched does not need downloading
|
|
@@ -133,33 +136,6 @@ class ResultFileDownloadManager:
|
|
|
133
136
|
]
|
|
134
137
|
return next_indices[0] if len(next_indices) > 0 else None
|
|
135
138
|
|
|
136
|
-
def _check_if_download_successful(self, handler: ResultSetDownloadHandler):
|
|
137
|
-
# Check (and wait until download finishes) if download was successful
|
|
138
|
-
if not handler.is_file_download_successful():
|
|
139
|
-
if handler.is_link_expired:
|
|
140
|
-
self.fetch_need_retry = True
|
|
141
|
-
return False
|
|
142
|
-
elif handler.is_download_timedout:
|
|
143
|
-
# Consecutive file retries should not exceed threshold in settings
|
|
144
|
-
if (
|
|
145
|
-
self.num_consecutive_result_file_download_retries
|
|
146
|
-
>= self.downloadable_result_settings.max_consecutive_file_download_retries
|
|
147
|
-
):
|
|
148
|
-
self.fetch_need_retry = True
|
|
149
|
-
return False
|
|
150
|
-
self.num_consecutive_result_file_download_retries += 1
|
|
151
|
-
|
|
152
|
-
# Re-submit handler run to thread pool and recursively check download status
|
|
153
|
-
self.thread_pool.submit(handler.run)
|
|
154
|
-
return self._check_if_download_successful(handler)
|
|
155
|
-
else:
|
|
156
|
-
self.fetch_need_retry = True
|
|
157
|
-
return False
|
|
158
|
-
|
|
159
|
-
self.num_consecutive_result_file_download_retries = 0
|
|
160
|
-
self.fetch_need_retry = False
|
|
161
|
-
return True
|
|
162
|
-
|
|
163
139
|
def _shutdown_manager(self):
|
|
164
140
|
# Clear download handlers and shutdown the thread pool
|
|
165
141
|
self.download_handlers = []
|
|
@@ -1,15 +1,17 @@
|
|
|
1
1
|
import logging
|
|
2
2
|
from dataclasses import dataclass
|
|
3
|
-
|
|
4
3
|
import requests
|
|
5
4
|
import lz4.frame
|
|
6
5
|
import threading
|
|
7
6
|
import time
|
|
8
|
-
|
|
7
|
+
import os
|
|
8
|
+
import re
|
|
9
9
|
from databricks.sql.thrift_api.TCLIService.ttypes import TSparkArrowResultLink
|
|
10
10
|
|
|
11
11
|
logger = logging.getLogger(__name__)
|
|
12
12
|
|
|
13
|
+
DEFAULT_CLOUD_FILE_TIMEOUT = int(os.getenv("DATABRICKS_CLOUD_FILE_TIMEOUT", 60))
|
|
14
|
+
|
|
13
15
|
|
|
14
16
|
@dataclass
|
|
15
17
|
class DownloadableResultSettings:
|
|
@@ -20,13 +22,17 @@ class DownloadableResultSettings:
|
|
|
20
22
|
is_lz4_compressed (bool): Whether file is expected to be lz4 compressed.
|
|
21
23
|
link_expiry_buffer_secs (int): Time in seconds to prevent download of a link before it expires. Default 0 secs.
|
|
22
24
|
download_timeout (int): Timeout for download requests. Default 60 secs.
|
|
23
|
-
|
|
25
|
+
download_max_retries (int): Number of consecutive download retries before shutting down.
|
|
26
|
+
max_retries (int): Number of consecutive download retries before shutting down.
|
|
27
|
+
backoff_factor (int): Factor to increase wait time between retries.
|
|
28
|
+
|
|
24
29
|
"""
|
|
25
30
|
|
|
26
31
|
is_lz4_compressed: bool
|
|
27
32
|
link_expiry_buffer_secs: int = 0
|
|
28
|
-
download_timeout: int =
|
|
29
|
-
|
|
33
|
+
download_timeout: int = DEFAULT_CLOUD_FILE_TIMEOUT
|
|
34
|
+
max_retries: int = 5
|
|
35
|
+
backoff_factor: int = 2
|
|
30
36
|
|
|
31
37
|
|
|
32
38
|
class ResultSetDownloadHandler(threading.Thread):
|
|
@@ -57,16 +63,21 @@ class ResultSetDownloadHandler(threading.Thread):
|
|
|
57
63
|
else None
|
|
58
64
|
)
|
|
59
65
|
try:
|
|
66
|
+
logger.debug(
|
|
67
|
+
f"waiting for at most {timeout} seconds for download file: startRow {self.result_link.startRowOffset}, rowCount {self.result_link.rowCount}, endRow {self.result_link.startRowOffset + self.result_link.rowCount}"
|
|
68
|
+
)
|
|
69
|
+
|
|
60
70
|
if not self.is_download_finished.wait(timeout=timeout):
|
|
61
71
|
self.is_download_timedout = True
|
|
62
|
-
logger.
|
|
63
|
-
"
|
|
64
|
-
self.settings.download_timeout,
|
|
65
|
-
self.result_link.startRowOffset,
|
|
66
|
-
self.result_link.startRowOffset + self.result_link.rowCount,
|
|
67
|
-
)
|
|
72
|
+
logger.error(
|
|
73
|
+
f"cloud fetch download timed out after {self.settings.download_timeout} seconds for link representing rows {self.result_link.startRowOffset} to {self.result_link.startRowOffset + self.result_link.rowCount}"
|
|
68
74
|
)
|
|
69
|
-
|
|
75
|
+
# there are some weird cases when the is_download_finished is not set, but the file is downloaded successfully
|
|
76
|
+
return self.is_file_downloaded_successfully
|
|
77
|
+
|
|
78
|
+
logger.debug(
|
|
79
|
+
f"finish waiting for download file: startRow {self.result_link.startRowOffset}, rowCount {self.result_link.rowCount}, endRow {self.result_link.startRowOffset + self.result_link.rowCount}"
|
|
80
|
+
)
|
|
70
81
|
except Exception as e:
|
|
71
82
|
logger.error(e)
|
|
72
83
|
return False
|
|
@@ -81,24 +92,36 @@ class ResultSetDownloadHandler(threading.Thread):
|
|
|
81
92
|
"""
|
|
82
93
|
self._reset()
|
|
83
94
|
|
|
84
|
-
|
|
85
|
-
|
|
86
|
-
|
|
87
|
-
|
|
88
|
-
|
|
89
|
-
|
|
95
|
+
try:
|
|
96
|
+
# Check if link is already expired or is expiring
|
|
97
|
+
if ResultSetDownloadHandler.check_link_expired(
|
|
98
|
+
self.result_link, self.settings.link_expiry_buffer_secs
|
|
99
|
+
):
|
|
100
|
+
self.is_link_expired = True
|
|
101
|
+
return
|
|
90
102
|
|
|
91
|
-
|
|
92
|
-
|
|
103
|
+
logger.debug(
|
|
104
|
+
f"started to download file: startRow {self.result_link.startRowOffset}, rowCount {self.result_link.rowCount}, endRow {self.result_link.startRowOffset + self.result_link.rowCount}"
|
|
105
|
+
)
|
|
93
106
|
|
|
94
|
-
try:
|
|
95
107
|
# Get the file via HTTP request
|
|
96
|
-
response =
|
|
108
|
+
response = http_get_with_retry(
|
|
109
|
+
url=self.result_link.fileLink,
|
|
110
|
+
max_retries=self.settings.max_retries,
|
|
111
|
+
backoff_factor=self.settings.backoff_factor,
|
|
112
|
+
download_timeout=self.settings.download_timeout,
|
|
113
|
+
)
|
|
97
114
|
|
|
98
|
-
if not response
|
|
99
|
-
|
|
115
|
+
if not response:
|
|
116
|
+
logger.error(
|
|
117
|
+
f"failed downloading file: startRow {self.result_link.startRowOffset}, rowCount {self.result_link.rowCount}, endRow {self.result_link.startRowOffset + self.result_link.rowCount}"
|
|
118
|
+
)
|
|
100
119
|
return
|
|
101
120
|
|
|
121
|
+
logger.debug(
|
|
122
|
+
f"success downloading file: startRow {self.result_link.startRowOffset}, rowCount {self.result_link.rowCount}, endRow {self.result_link.startRowOffset + self.result_link.rowCount}"
|
|
123
|
+
)
|
|
124
|
+
|
|
102
125
|
# Save (and decompress if needed) the downloaded file
|
|
103
126
|
compressed_data = response.content
|
|
104
127
|
decompressed_data = (
|
|
@@ -109,15 +132,22 @@ class ResultSetDownloadHandler(threading.Thread):
|
|
|
109
132
|
self.result_file = decompressed_data
|
|
110
133
|
|
|
111
134
|
# The size of the downloaded file should match the size specified from TSparkArrowResultLink
|
|
112
|
-
self.
|
|
113
|
-
|
|
135
|
+
success = len(self.result_file) == self.result_link.bytesNum
|
|
136
|
+
logger.debug(
|
|
137
|
+
f"download successful file: startRow {self.result_link.startRowOffset}, rowCount {self.result_link.rowCount}, endRow {self.result_link.startRowOffset + self.result_link.rowCount}"
|
|
114
138
|
)
|
|
139
|
+
self.is_file_downloaded_successfully = success
|
|
115
140
|
except Exception as e:
|
|
141
|
+
logger.error(
|
|
142
|
+
f"exception downloading file: startRow {self.result_link.startRowOffset}, rowCount {self.result_link.rowCount}, endRow {self.result_link.startRowOffset + self.result_link.rowCount}"
|
|
143
|
+
)
|
|
116
144
|
logger.error(e)
|
|
117
145
|
self.is_file_downloaded_successfully = False
|
|
118
146
|
|
|
119
147
|
finally:
|
|
120
|
-
|
|
148
|
+
logger.debug(
|
|
149
|
+
f"signal finished file: startRow {self.result_link.startRowOffset}, rowCount {self.result_link.rowCount}, endRow {self.result_link.startRowOffset + self.result_link.rowCount}"
|
|
150
|
+
)
|
|
121
151
|
# Awaken threads waiting for this to be true which signals the run is complete
|
|
122
152
|
self.is_download_finished.set()
|
|
123
153
|
|
|
@@ -145,6 +175,7 @@ class ResultSetDownloadHandler(threading.Thread):
|
|
|
145
175
|
link.expiryTime < current_time
|
|
146
176
|
or link.expiryTime - current_time < expiry_buffer_secs
|
|
147
177
|
):
|
|
178
|
+
logger.debug("link expired")
|
|
148
179
|
return True
|
|
149
180
|
return False
|
|
150
181
|
|
|
@@ -171,3 +202,38 @@ class ResultSetDownloadHandler(threading.Thread):
|
|
|
171
202
|
uncompressed_data += data
|
|
172
203
|
start += num_bytes
|
|
173
204
|
return uncompressed_data
|
|
205
|
+
|
|
206
|
+
|
|
207
|
+
def http_get_with_retry(url, max_retries=5, backoff_factor=2, download_timeout=60):
|
|
208
|
+
attempts = 0
|
|
209
|
+
pattern = re.compile(r"(\?|&)([\w-]+)=([^&\s]+)")
|
|
210
|
+
mask = r"\1\2=<REDACTED>"
|
|
211
|
+
|
|
212
|
+
# TODO: introduce connection pooling. I am seeing weird errors without it.
|
|
213
|
+
while attempts < max_retries:
|
|
214
|
+
try:
|
|
215
|
+
session = requests.Session()
|
|
216
|
+
session.timeout = download_timeout
|
|
217
|
+
response = session.get(url)
|
|
218
|
+
|
|
219
|
+
# Check if the response status code is in the 2xx range for success
|
|
220
|
+
if response.status_code == 200:
|
|
221
|
+
return response
|
|
222
|
+
else:
|
|
223
|
+
logger.error(response)
|
|
224
|
+
except requests.RequestException as e:
|
|
225
|
+
# if this is not redacted, it will print the pre-signed URL
|
|
226
|
+
logger.error(f"request failed with exception: {re.sub(pattern, mask, str(e))}")
|
|
227
|
+
finally:
|
|
228
|
+
session.close()
|
|
229
|
+
# Exponential backoff before the next attempt
|
|
230
|
+
wait_time = backoff_factor**attempts
|
|
231
|
+
logger.info(f"retrying in {wait_time} seconds...")
|
|
232
|
+
time.sleep(wait_time)
|
|
233
|
+
|
|
234
|
+
attempts += 1
|
|
235
|
+
|
|
236
|
+
logger.error(
|
|
237
|
+
f"exceeded maximum number of retries ({max_retries}) while downloading result."
|
|
238
|
+
)
|
|
239
|
+
return None
|
{databricks_sql_connector-2.9.3 → databricks_sql_connector-2.9.4b1}/src/databricks/sql/exc.py
RENAMED
|
@@ -115,3 +115,7 @@ class SessionAlreadyClosedError(RequestError):
|
|
|
115
115
|
|
|
116
116
|
class CursorAlreadyClosedError(RequestError):
|
|
117
117
|
"""Thrown if CancelOperation receives a code 404. ThriftBackend should gracefully proceed as this is expected."""
|
|
118
|
+
|
|
119
|
+
|
|
120
|
+
class ResultSetDownloadError(RequestError):
|
|
121
|
+
"""Thrown if there was an error during the download of a result set"""
|
|
@@ -371,7 +371,9 @@ class ThriftBackend:
|
|
|
371
371
|
|
|
372
372
|
this_method_name = getattr(method, "__name__")
|
|
373
373
|
|
|
374
|
-
logger.debug(
|
|
374
|
+
logger.debug(
|
|
375
|
+
"sending thrift request: {}(<REDACTED>)".format(this_method_name)
|
|
376
|
+
)
|
|
375
377
|
unsafe_logger.debug("Sending request: {}".format(request))
|
|
376
378
|
|
|
377
379
|
# These three lines are no-ops if the v3 retry policy is not in use
|
|
@@ -387,7 +389,9 @@ class ThriftBackend:
|
|
|
387
389
|
|
|
388
390
|
# We need to call type(response) here because thrift doesn't implement __name__ attributes for thrift responses
|
|
389
391
|
logger.debug(
|
|
390
|
-
"
|
|
392
|
+
"received thrift response: {}(<REDACTED>)".format(
|
|
393
|
+
type(response).__name__
|
|
394
|
+
)
|
|
391
395
|
)
|
|
392
396
|
unsafe_logger.debug("Received response: {}".format(response))
|
|
393
397
|
return response
|
|
@@ -741,6 +745,7 @@ class ThriftBackend:
|
|
|
741
745
|
lz4_compressed = t_result_set_metadata_resp.lz4Compressed
|
|
742
746
|
is_staging_operation = t_result_set_metadata_resp.isStagingOperation
|
|
743
747
|
if direct_results and direct_results.resultSet:
|
|
748
|
+
logger.debug(f"received direct results")
|
|
744
749
|
assert direct_results.resultSet.results.startRowOffset == 0
|
|
745
750
|
assert direct_results.resultSetMetadata
|
|
746
751
|
|
|
@@ -753,6 +758,7 @@ class ThriftBackend:
|
|
|
753
758
|
description=description,
|
|
754
759
|
)
|
|
755
760
|
else:
|
|
761
|
+
logger.debug(f"must fetch results")
|
|
756
762
|
arrow_queue_opt = None
|
|
757
763
|
return ExecuteResponse(
|
|
758
764
|
arrow_queue=arrow_queue_opt,
|
|
@@ -816,6 +822,10 @@ class ThriftBackend:
|
|
|
816
822
|
):
|
|
817
823
|
assert session_handle is not None
|
|
818
824
|
|
|
825
|
+
logger.debug(
|
|
826
|
+
f"executing: cloud fetch: {use_cloud_fetch}, max rows: {max_rows}, max bytes: {max_bytes}"
|
|
827
|
+
)
|
|
828
|
+
|
|
819
829
|
spark_arrow_types = ttypes.TSparkArrowTypes(
|
|
820
830
|
timestampAsArrow=self._use_arrow_native_timestamps,
|
|
821
831
|
decimalAsArrow=self._use_arrow_native_decimals,
|
|
@@ -930,6 +940,7 @@ class ThriftBackend:
|
|
|
930
940
|
return self._handle_execute_response(resp, cursor)
|
|
931
941
|
|
|
932
942
|
def _handle_execute_response(self, resp, cursor):
|
|
943
|
+
logger.debug(f"got execute response")
|
|
933
944
|
cursor.active_op_handle = resp.operationHandle
|
|
934
945
|
self._check_direct_results_for_error(resp.directResults)
|
|
935
946
|
|
|
@@ -950,6 +961,7 @@ class ThriftBackend:
|
|
|
950
961
|
arrow_schema_bytes,
|
|
951
962
|
description,
|
|
952
963
|
):
|
|
964
|
+
logger.debug("started to fetch results")
|
|
953
965
|
assert op_handle is not None
|
|
954
966
|
|
|
955
967
|
req = ttypes.TFetchResultsReq(
|
{databricks_sql_connector-2.9.3 → databricks_sql_connector-2.9.4b1}/src/databricks/sql/utils.py
RENAMED
|
@@ -5,6 +5,7 @@ from decimal import Decimal
|
|
|
5
5
|
import datetime
|
|
6
6
|
import decimal
|
|
7
7
|
from enum import Enum
|
|
8
|
+
import logging
|
|
8
9
|
import lz4.frame
|
|
9
10
|
from typing import Dict, List, Union, Any
|
|
10
11
|
import pyarrow
|
|
@@ -18,6 +19,7 @@ from databricks.sql.thrift_api.TCLIService.ttypes import (
|
|
|
18
19
|
)
|
|
19
20
|
|
|
20
21
|
BIT_MASKS = [1, 2, 4, 8, 16, 32, 64, 128]
|
|
22
|
+
logger = logging.getLogger(__name__)
|
|
21
23
|
|
|
22
24
|
|
|
23
25
|
class ResultSetQueue(ABC):
|
|
@@ -71,6 +73,9 @@ class ResultSetQueueFactory(ABC):
|
|
|
71
73
|
)
|
|
72
74
|
return ArrowQueue(converted_arrow_table, n_valid_rows)
|
|
73
75
|
elif row_set_type == TSparkRowSetType.URL_BASED_SET:
|
|
76
|
+
logger.debug(
|
|
77
|
+
f"built cloud fetch queue for {len(t_row_set.resultLinks)} links."
|
|
78
|
+
)
|
|
74
79
|
return CloudFetchQueue(
|
|
75
80
|
arrow_schema_bytes,
|
|
76
81
|
start_row_offset=t_row_set.startRowOffset,
|
|
@@ -146,6 +151,9 @@ class CloudFetchQueue(ResultSetQueue):
|
|
|
146
151
|
self.lz4_compressed = lz4_compressed
|
|
147
152
|
self.description = description
|
|
148
153
|
|
|
154
|
+
logger.debug(
|
|
155
|
+
f"creating cloud fetch queue for {len(result_links)} links and max_download_threads {self.max_download_threads}."
|
|
156
|
+
)
|
|
149
157
|
self.download_manager = ResultFileDownloadManager(
|
|
150
158
|
self.max_download_threads, self.lz4_compressed
|
|
151
159
|
)
|
|
File without changes
|
|
File without changes
|
{databricks_sql_connector-2.9.3 → databricks_sql_connector-2.9.4b1}/src/databricks/__init__.py
RENAMED
|
File without changes
|
|
File without changes
|
{databricks_sql_connector-2.9.3 → databricks_sql_connector-2.9.4b1}/src/databricks/sql/auth/auth.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
{databricks_sql_connector-2.9.3 → databricks_sql_connector-2.9.4b1}/src/databricks/sql/auth/oauth.py
RENAMED
|
File without changes
|
|
File without changes
|
{databricks_sql_connector-2.9.3 → databricks_sql_connector-2.9.4b1}/src/databricks/sql/auth/retry.py
RENAMED
|
File without changes
|
|
File without changes
|
{databricks_sql_connector-2.9.3 → databricks_sql_connector-2.9.4b1}/src/databricks/sql/client.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{databricks_sql_connector-2.9.3 → databricks_sql_connector-2.9.4b1}/src/databricks/sql/types.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|