PyPI - databricks-sql-connector - Versions diffs - 3.1.1__tar.gz → 3.2.0__tar.gz - Mend

databricks-sql-connector 3.1.1tar.gz → 3.2.0tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (58) hide show

{databricks_sql_connector-3.1.1 → databricks_sql_connector-3.2.0}/CHANGELOG.md RENAMED Viewed

@@ -1,6 +1,17 @@
 # Release History
-# x.x.x (TBD)
+# 3.2.0 (2024-06-06)
+- Update proxy authentication (databricks/databricks-sql-python#354 by @amir-haroun)
+- Relax `pyarrow` pin (databricks/databricks-sql-python#389 by @dhirschfeld)
+- Fix error logging in OAuth manager (databricks/databricks-sql-python#269 by @susodapop)
+- SQLAlchemy: enable delta.feature.allowColumnDefaults for all tables (databricks/databricks-sql-python#343 by @dhirschfeld)
+- Update `thrift` dependency (databricks/databricks-sql-python#397 by @m1n0)
+# 3.1.2 (2024-04-18)
+- Remove broken cookie code (#379)
+- Small typing fixes (#382, #384 thanks @wyattscarpenter)
 # 3.1.1 (2024-03-19)

{databricks_sql_connector-3.1.1 → databricks_sql_connector-3.2.0}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: databricks-sql-connector
-Version: 3.1.1
+Version: 3.2.0
 Summary: Databricks SQL Connector for Python
 License: Apache-2.0
 Author: Databricks
@@ -22,10 +22,10 @@ Requires-Dist: numpy (>=1.23.4) ; python_version >= "3.11"
 Requires-Dist: oauthlib (>=3.1.0,<4.0.0)
 Requires-Dist: openpyxl (>=3.0.10,<4.0.0)
 Requires-Dist: pandas (>=1.2.5,<2.2.0) ; python_version >= "3.8"
-Requires-Dist: pyarrow (>=14.0.1,<15.0.0)
+Requires-Dist: pyarrow (>=14.0.1,<17)
 Requires-Dist: requests (>=2.18.1,<3.0.0)
 Requires-Dist: sqlalchemy (>=2.0.21) ; extra == "sqlalchemy" or extra == "alembic"
-Requires-Dist: thrift (>=0.16.0,<0.17.0)
+Requires-Dist: thrift (>=0.16.0,<0.21.0)
 Requires-Dist: urllib3 (>=1.26)
 Project-URL: Bug Tracker, https://github.com/databricks/databricks-sql-python/issues
 Project-URL: Homepage, https://github.com/databricks/databricks-sql-python

{databricks_sql_connector-3.1.1 → databricks_sql_connector-3.2.0}/pyproject.toml RENAMED Viewed

@@ -1,6 +1,6 @@
 [tool.poetry]
 name = "databricks-sql-connector"
-version = "3.1.1"
+version = "3.2.0"
 description = "Databricks SQL Connector for Python"
 authors = ["Databricks <databricks-sql-connector-maintainers@databricks.com>"]
 license = "Apache-2.0"
@@ -10,11 +10,11 @@ include = ["CHANGELOG.md"]
 [tool.poetry.dependencies]
 python = "^3.8.0"
-thrift = "^0.16.0"
+thrift = ">=0.16.0,<0.21.0"
 pandas = [
     { version = ">=1.2.5,<2.2.0", python = ">=3.8" }
 ]
-pyarrow = "^14.0.1"
+pyarrow = ">=14.0.1,<17"
 lz4 = "^4.0.2"
 requests = "^2.18.1"

{databricks_sql_connector-3.1.1/src/databricks/sql/parameters → databricks_sql_connector-3.2.0/src/databricks}/py.typed RENAMED Viewed

File without changes

{databricks_sql_connector-3.1.1 → databricks_sql_connector-3.2.0}/src/databricks/sql/__init__.py RENAMED Viewed

@@ -10,6 +10,12 @@ paramstyle = "named"
 import re
+from typing import TYPE_CHECKING
+if TYPE_CHECKING:
+    # Use this import purely for type annotations, a la https://mypy.readthedocs.io/en/latest/runtime_troubles.html#import-cycles
+    from .client import Connection
 class RedactUrlQueryParamsFilter(logging.Filter):
     pattern = re.compile(r"(\?|&)([\w-]+)=([^&]+)")
@@ -62,7 +68,7 @@ DATETIME = DBAPITypeObject("timestamp")
 DATE = DBAPITypeObject("date")
 ROWID = DBAPITypeObject()
-__version__ = "3.1.1"
+__version__ = "3.2.0"
 USER_AGENT_NAME = "PyDatabricksSqlConnector"
 # These two functions are pyhive legacy
@@ -78,7 +84,7 @@ def TimestampFromTicks(ticks):
     return Timestamp(*time.localtime(ticks)[:6])
-def connect(server_hostname, http_path, access_token=None, **kwargs):
+def connect(server_hostname, http_path, access_token=None, **kwargs) -> "Connection":
     from .client import Connection
     return Connection(server_hostname, http_path, access_token, **kwargs)

{databricks_sql_connector-3.1.1 → databricks_sql_connector-3.2.0}/src/databricks/sql/auth/oauth.py RENAMED Viewed

@@ -125,7 +125,7 @@ class OAuthManager:
                     logger.info(f"Port {port} is in use")
                     last_error = e
             except Exception as e:
-                logger.error("unexpected error", e)
+                logger.error("unexpected error: %s", e)
         if self.redirect_port is None:
             logger.error(
                 f"Tried all the ports {self.port_range} for oauth redirect, but can't find free port"

{databricks_sql_connector-3.1.1 → databricks_sql_connector-3.2.0}/src/databricks/sql/auth/thrift_http_client.py RENAMED Viewed

@@ -14,7 +14,7 @@ from http.client import HTTPResponse
 from io import BytesIO
 from urllib3 import HTTPConnectionPool, HTTPSConnectionPool, ProxyManager
+from urllib3.util import make_headers
 from databricks.sql.auth.retry import CommandType, DatabricksRetryPolicy
@@ -78,7 +78,7 @@ class THttpClient(thrift.transport.THttpClient.THttpClient):
             self.proxy_uri: str = proxy
             self.host = parsed.hostname
             self.port = parsed.port
-            self.proxy_auth = self.basic_proxy_auth_header(parsed)
+            self.proxy_auth = self.basic_proxy_auth_headers(parsed)
         else:
             self.realhost = self.realport = self.proxy_auth = None
@@ -120,7 +120,7 @@ class THttpClient(thrift.transport.THttpClient.THttpClient):
             proxy_manager = ProxyManager(
                 self.proxy_uri,
                 num_pools=1,
-                headers={"Proxy-Authorization": self.proxy_auth},
+                proxy_headers=self.proxy_auth,
             )
             self.__pool = proxy_manager.connection_from_host(
                 host=self.realhost,
@@ -167,7 +167,7 @@ class THttpClient(thrift.transport.THttpClient.THttpClient):
         }
         if self.using_proxy() and self.scheme == "http" and self.proxy_auth is not None:
-            headers["Proxy-Authorization" : self.proxy_auth]
+            headers.update(self.proxy_auth)
         if self.__custom_headers:
             custom_headers = {key: val for key, val in self.__custom_headers.items()}
@@ -189,20 +189,15 @@ class THttpClient(thrift.transport.THttpClient.THttpClient):
         self.message = self.__resp.reason
         self.headers = self.__resp.headers
-        # Saves the cookie sent by the server response
-        if "Set-Cookie" in self.headers:
-            self.setCustomHeaders(dict("Cookie", self.headers["Set-Cookie"]))
     @staticmethod
-    def basic_proxy_auth_header(proxy):
+    def basic_proxy_auth_headers(proxy):
         if proxy is None or not proxy.username:
             return None
         ap = "%s:%s" % (
             urllib.parse.unquote(proxy.username),
             urllib.parse.unquote(proxy.password),
         )
-        cr = base64.b64encode(ap.encode()).strip()
-        return "Basic " + six.ensure_str(cr)
+        return make_headers(proxy_basic_auth=ap)
     def set_retry_command_type(self, value: CommandType):
         """Pass the provided CommandType to the retry policy"""

{databricks_sql_connector-3.1.1 → databricks_sql_connector-3.2.0}/src/databricks/sql/client.py RENAMED Viewed

@@ -271,7 +271,8 @@ class Connection:
         return value
-    def __enter__(self):
+    # The ideal return type for this method is perhaps Self, but that was not added until 3.11, and we support pre-3.11 pythons, currently.
+    def __enter__(self) -> "Connection":
         return self
     def __exit__(self, exc_type, exc_value, traceback):
@@ -409,7 +410,8 @@ class Cursor:
         self.escaper = ParamEscaper()
         self.lastrowid = None
-    def __enter__(self):
+    # The ideal return type for this method is perhaps Self, but that was not added until 3.11, and we support pre-3.11 pythons, currently.
+    def __enter__(self) -> "Cursor":
         return self
     def __exit__(self, exc_type, exc_value, traceback):

{databricks_sql_connector-3.1.1 → databricks_sql_connector-3.2.0}/src/databricks/sql/cloudfetch/download_manager.py RENAMED Viewed

@@ -49,6 +49,11 @@ class ResultFileDownloadManager:
         for link in t_spark_arrow_result_links:
             if link.rowCount <= 0:
                 continue
+            logger.debug(
+                "ResultFileDownloadManager.add_file_links: start offset {}, row count: {}".format(
+                    link.startRowOffset, link.rowCount
+                )
+            )
             self.download_handlers.append(
                 ResultSetDownloadHandler(self.downloadable_result_settings, link)
             )
@@ -88,6 +93,12 @@ class ResultFileDownloadManager:
         # Check (and wait) for download status
         if self._check_if_download_successful(handler):
+            link = handler.result_link
+            logger.debug(
+                "ResultFileDownloadManager: file found for row index {}: start {}, row count: {}".format(
+                    next_row_offset, link.startRowOffset, link.rowCount
+                )
+            )
             # Buffer should be empty so set buffer to new ArrowQueue with result_file
             result = DownloadedFile(
                 handler.result_file,
@@ -97,15 +108,32 @@ class ResultFileDownloadManager:
             self.download_handlers.pop(idx)
             # Return True upon successful download to continue loop and not force a retry
             return result
+        else:
+            logger.debug(
+                "ResultFileDownloadManager: cannot find file for row index {}".format(
+                    next_row_offset
+                )
+            )
         # Download was not successful for next download item, force a retry
         self._shutdown_manager()
         return None
     def _remove_past_handlers(self, next_row_offset: int):
+        logger.debug(
+            "ResultFileDownloadManager: removing past handlers, current offset: {}".format(
+                next_row_offset
+            )
+        )
         # Any link in which its start to end range doesn't include the next row to be fetched does not need downloading
         i = 0
         while i < len(self.download_handlers):
             result_link = self.download_handlers[i].result_link
+            logger.debug(
+                "- checking result link: start {}, row count: {}, current offset: {}".format(
+                    result_link.startRowOffset, result_link.rowCount, next_row_offset
+                )
+            )
             if result_link.startRowOffset + result_link.rowCount > next_row_offset:
                 i += 1
                 continue
@@ -113,10 +141,16 @@ class ResultFileDownloadManager:
     def _schedule_downloads(self):
         # Schedule downloads for all download handlers if not already scheduled.
+        logger.debug("ResultFileDownloadManager: schedule downloads")
         for handler in self.download_handlers:
             if handler.is_download_scheduled:
                 continue
             try:
+                logger.debug(
+                    "- start: {}, row count: {}".format(
+                        handler.result_link.startRowOffset, handler.result_link.rowCount
+                    )
+                )
                 self.thread_pool.submit(handler.run)
             except Exception as e:
                 logger.error(e)
@@ -124,13 +158,28 @@ class ResultFileDownloadManager:
             handler.is_download_scheduled = True
     def _find_next_file_index(self, next_row_offset: int):
+        logger.debug(
+            "ResultFileDownloadManager: trying to find file for row {}".format(
+                next_row_offset
+            )
+        )
         # Get the handler index of the next file in order
         next_indices = [
             i
             for i, handler in enumerate(self.download_handlers)
             if handler.is_download_scheduled
+            # TODO: shouldn't `next_row_offset` be tested against the range, not just start row offset?
             and handler.result_link.startRowOffset == next_row_offset
         ]
+        for i in next_indices:
+            link = self.download_handlers[i].result_link
+            logger.debug(
+                "- found file: start {}, row count {}".format(
+                    link.startRowOffset, link.rowCount
+                )
+            )
         return next_indices[0] if len(next_indices) > 0 else None
     def _check_if_download_successful(self, handler: ResultSetDownloadHandler):

databricks_sql_connector-3.2.0/src/databricks/sql/thrift_api/__init__.py ADDED Viewed

File without changes

{databricks_sql_connector-3.1.1 → databricks_sql_connector-3.2.0}/src/databricks/sql/utils.py RENAMED Viewed

@@ -156,6 +156,19 @@ class CloudFetchQueue(ResultSetQueue):
         self.lz4_compressed = lz4_compressed
         self.description = description
+        logger.debug(
+            "Initialize CloudFetch loader, row set start offset: {}, file list:".format(
+                start_row_offset
+            )
+        )
+        if result_links is not None:
+            for result_link in result_links:
+                logger.debug(
+                    "- start row offset: {}, row count: {}".format(
+                        result_link.startRowOffset, result_link.rowCount
+                    )
+                )
         self.download_manager = ResultFileDownloadManager(
             self.max_download_threads, self.lz4_compressed
         )
@@ -175,8 +188,10 @@ class CloudFetchQueue(ResultSetQueue):
             pyarrow.Table
         """
         if not self.table:
+            logger.debug("CloudFetchQueue: no more rows available")
             # Return empty pyarrow table to cause retry of fetch
             return self._create_empty_table()
+        logger.debug("CloudFetchQueue: trying to get {} next rows".format(num_rows))
         results = self.table.slice(0, 0)
         while num_rows > 0 and self.table:
             # Get remaining of num_rows or the rest of the current table, whichever is smaller
@@ -190,6 +205,8 @@ class CloudFetchQueue(ResultSetQueue):
                 self.table = self._create_next_table()
                 self.table_row_index = 0
             num_rows -= table_slice.num_rows
+        logger.debug("CloudFetchQueue: collected {} next rows".format(results.num_rows))
         return results
     def remaining_rows(self) -> pyarrow.Table:
@@ -214,11 +231,21 @@ class CloudFetchQueue(ResultSetQueue):
         return results
     def _create_next_table(self) -> Union[pyarrow.Table, None]:
+        logger.debug(
+            "CloudFetchQueue: Trying to get downloaded file for row {}".format(
+                self.start_row_index
+            )
+        )
         # Create next table by retrieving the logical next downloaded file, or return None to signal end of queue
         downloaded_file = self.download_manager.get_next_downloaded_file(
             self.start_row_index
         )
         if not downloaded_file:
+            logger.debug(
+                "CloudFetchQueue: Cannot find downloaded file for row {}".format(
+                    self.start_row_index
+                )
+            )
             # None signals no more Arrow tables can be built from the remaining handlers if any remain
             return None
         arrow_table = create_arrow_table_from_arrow_file(
@@ -228,12 +255,18 @@ class CloudFetchQueue(ResultSetQueue):
         # The server rarely prepares the exact number of rows requested by the client in cloud fetch.
         # Subsequently, we drop the extraneous rows in the last file if more rows are retrieved than requested
         if arrow_table.num_rows > downloaded_file.row_count:
-            self.start_row_index += downloaded_file.row_count
-            return arrow_table.slice(0, downloaded_file.row_count)
+            arrow_table = arrow_table.slice(0, downloaded_file.row_count)
         # At this point, whether the file has extraneous rows or not, the arrow table should have the correct num rows
         assert downloaded_file.row_count == arrow_table.num_rows
         self.start_row_index += arrow_table.num_rows
+        logger.debug(
+            "CloudFetchQueue: Found downloaded file, row count: {}, new start offset: {}".format(
+                arrow_table.num_rows, self.start_row_index
+            )
+        )
         return arrow_table
     def _create_empty_table(self) -> pyarrow.Table:

{databricks_sql_connector-3.1.1 → databricks_sql_connector-3.2.0}/src/databricks/sqlalchemy/_ddl.py RENAMED Viewed

@@ -16,13 +16,15 @@ class DatabricksIdentifierPreparer(compiler.IdentifierPreparer):
 class DatabricksDDLCompiler(compiler.DDLCompiler):
     def post_create_table(self, table):
-        post = " USING DELTA"
+        post = [" USING DELTA"]
         if table.comment:
             comment = self.sql_compiler.render_literal_value(
                 table.comment, sqltypes.String()
             )
-            post += " COMMENT " + comment
-        return post
+            post.append("COMMENT " + comment)
+        post.append("TBLPROPERTIES('delta.feature.allowColumnDefaults' = 'enabled')")
+        return "\n".join(post)
     def visit_unique_constraint(self, constraint, **kw):
         logger.warning("Databricks does not support unique constraints")

{databricks_sql_connector-3.1.1 → databricks_sql_connector-3.2.0}/src/databricks/sqlalchemy/test_local/test_ddl.py RENAMED Viewed

@@ -79,7 +79,8 @@ class TestTableCommentDDL(DDLTestBase):
     def test_create_table_with_comment(self, table_with_comment):
         stmt = CreateTable(table_with_comment)
         output = self.compile(stmt)
-        assert "USING DELTA COMMENT 'foobar'" in output
+        assert "USING DELTA" in output
+        assert "COMMENT 'foobar'" in output
     def test_alter_table_add_comment(self, table_without_comment: Table):
         table_without_comment.comment = "wireless mechanical keyboard"