PyPI - databricks-sql-connector - Versions diffs - 3.2.0__tar.gz → 3.3.0__tar.gz - Mend

databricks-sql-connector 3.2.0tar.gz → 3.3.0tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (61) hide show

{databricks_sql_connector-3.2.0 → databricks_sql_connector-3.3.0}/CHANGELOG.md RENAMED Viewed

@@ -1,5 +1,19 @@
 # Release History
+# 3.3.0 (2024-07-18)
+- Don't retry requests that fail with HTTP code 401 (databricks/databricks-sql-python#408 by @Hodnebo)
+- Remove username/password (aka "basic") auth option (databricks/databricks-sql-python#409 by @jackyhu-db)
+- Refactor CloudFetch handler to fix numerous issues with it (databricks/databricks-sql-python#405 by @kravets-levko)
+- Add option to disable SSL verification for CloudFetch links (databricks/databricks-sql-python#414 by @kravets-levko)
+Databricks-managed passwords reached end of life on July 10, 2024. Therefore, Basic auth support was removed from
+the library. See https://docs.databricks.com/en/security/auth-authz/password-deprecation.html
+The existing option `_tls_no_verify=True` of `sql.connect(...)` will now also disable SSL cert verification
+(but not the SSL itself) for CloudFetch links. This option should be used as a workaround only, when other ways
+to fix SSL certificate errors didn't work.
 # 3.2.0 (2024-06-06)
 - Update proxy authentication (databricks/databricks-sql-python#354 by @amir-haroun)

{databricks_sql_connector-3.2.0 → databricks_sql_connector-3.3.0}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: databricks-sql-connector
-Version: 3.2.0
+Version: 3.3.0
 Summary: Databricks SQL Connector for Python
 License: Apache-2.0
 Author: Databricks
@@ -17,8 +17,8 @@ Provides-Extra: alembic
 Provides-Extra: sqlalchemy
 Requires-Dist: alembic (>=1.0.11,<2.0.0) ; extra == "alembic"
 Requires-Dist: lz4 (>=4.0.2,<5.0.0)
-Requires-Dist: numpy (>=1.16.6) ; python_version >= "3.8" and python_version < "3.11"
-Requires-Dist: numpy (>=1.23.4) ; python_version >= "3.11"
+Requires-Dist: numpy (>=1.16.6,<2.0.0) ; python_version >= "3.8" and python_version < "3.11"
+Requires-Dist: numpy (>=1.23.4,<2.0.0) ; python_version >= "3.11"
 Requires-Dist: oauthlib (>=3.1.0,<4.0.0)
 Requires-Dist: openpyxl (>=3.0.10,<4.0.0)
 Requires-Dist: pandas (>=1.2.5,<2.2.0) ; python_version >= "3.8"

{databricks_sql_connector-3.2.0 → databricks_sql_connector-3.3.0}/pyproject.toml RENAMED Viewed

@@ -1,6 +1,6 @@
 [tool.poetry]
 name = "databricks-sql-connector"
-version = "3.2.0"
+version = "3.3.0"
 description = "Databricks SQL Connector for Python"
 authors = ["Databricks <databricks-sql-connector-maintainers@databricks.com>"]
 license = "Apache-2.0"
@@ -20,8 +20,8 @@ lz4 = "^4.0.2"
 requests = "^2.18.1"
 oauthlib = "^3.1.0"
 numpy = [
-    { version = ">=1.16.6", python = ">=3.8,<3.11" },
-    { version = ">=1.23.4", python = ">=3.11" },
+    { version = "^1.16.6", python = ">=3.8,<3.11" },
+    { version = "^1.23.4", python = ">=3.11" },
 ]
 sqlalchemy = { version = ">=2.0.21", optional = true }
 openpyxl = "^3.0.10"
@@ -34,7 +34,7 @@ alembic = ["sqlalchemy", "alembic"]
 [tool.poetry.dev-dependencies]
 pytest = "^7.1.2"
-mypy = "^0.981"
+mypy = "^1.10.1"
 pylint = ">=2.12.0"
 black = "^22.3.0"
 pytest-dotenv = "^0.5.2"

{databricks_sql_connector-3.2.0 → databricks_sql_connector-3.3.0}/src/databricks/sql/__init__.py RENAMED Viewed

@@ -68,7 +68,7 @@ DATETIME = DBAPITypeObject("timestamp")
 DATE = DBAPITypeObject("date")
 ROWID = DBAPITypeObject()
-__version__ = "3.2.0"
+__version__ = "3.3.0"
 USER_AGENT_NAME = "PyDatabricksSqlConnector"
 # These two functions are pyhive legacy

{databricks_sql_connector-3.2.0 → databricks_sql_connector-3.3.0}/src/databricks/sql/auth/auth.py RENAMED Viewed

@@ -1,10 +1,9 @@
 from enum import Enum
-from typing import List
+from typing import Optional, List
 from databricks.sql.auth.authenticators import (
     AuthProvider,
     AccessTokenAuthProvider,
-    BasicAuthProvider,
     ExternalAuthProvider,
     DatabricksOAuthProvider,
 )
@@ -13,7 +12,7 @@ from databricks.sql.auth.authenticators import (
 class AuthType(Enum):
     DATABRICKS_OAUTH = "databricks-oauth"
     AZURE_OAUTH = "azure-oauth"
-    # other supported types (access_token, user/pass) can be inferred
+    # other supported types (access_token) can be inferred
     # we can add more types as needed later
@@ -21,21 +20,17 @@ class ClientContext:
     def __init__(
         self,
         hostname: str,
-        username: str = None,
-        password: str = None,
-        access_token: str = None,
-        auth_type: str = None,
-        oauth_scopes: List[str] = None,
-        oauth_client_id: str = None,
-        oauth_redirect_port_range: List[int] = None,
-        use_cert_as_auth: str = None,
-        tls_client_cert_file: str = None,
+        access_token: Optional[str] = None,
+        auth_type: Optional[str] = None,
+        oauth_scopes: Optional[List[str]] = None,
+        oauth_client_id: Optional[str] = None,
+        oauth_redirect_port_range: Optional[List[int]] = None,
+        use_cert_as_auth: Optional[str] = None,
+        tls_client_cert_file: Optional[str] = None,
         oauth_persistence=None,
         credentials_provider=None,
     ):
         self.hostname = hostname
-        self.username = username
-        self.password = password
         self.access_token = access_token
         self.auth_type = auth_type
         self.oauth_scopes = oauth_scopes
@@ -65,8 +60,6 @@ def get_auth_provider(cfg: ClientContext):
         )
     elif cfg.access_token is not None:
         return AccessTokenAuthProvider(cfg.access_token)
-    elif cfg.username is not None and cfg.password is not None:
-        return BasicAuthProvider(cfg.username, cfg.password)
     elif cfg.use_cert_as_auth and cfg.tls_client_cert_file:
         # no op authenticator. authentication is performed using ssl certificate outside of headers
         return AuthProvider()
@@ -100,12 +93,16 @@ def get_python_sql_connector_auth_provider(hostname: str, **kwargs):
     (client_id, redirect_port_range) = get_client_id_and_redirect_port(
         auth_type == AuthType.AZURE_OAUTH.value
     )
+    if kwargs.get("username") or kwargs.get("password"):
+        raise ValueError(
+            "Username/password authentication is no longer supported. "
+            "Please use OAuth or access token instead."
+        )
     cfg = ClientContext(
         hostname=normalize_host_name(hostname),
         auth_type=auth_type,
         access_token=kwargs.get("access_token"),
-        username=kwargs.get("_username"),
-        password=kwargs.get("_password"),
         use_cert_as_auth=kwargs.get("_use_cert_as_auth"),
         tls_client_cert_file=kwargs.get("_tls_client_cert_file"),
         oauth_scopes=PYSQL_OAUTH_SCOPES,

{databricks_sql_connector-3.2.0 → databricks_sql_connector-3.3.0}/src/databricks/sql/auth/authenticators.py RENAMED Viewed

@@ -43,21 +43,6 @@ class AccessTokenAuthProvider(AuthProvider):
         request_headers["Authorization"] = self.__authorization_header_value
-# Private API: this is an evolving interface and it will change in the future.
-# Please must not depend on it in your applications.
-class BasicAuthProvider(AuthProvider):
-    def __init__(self, username: str, password: str):
-        auth_credentials = f"{username}:{password}".encode("UTF-8")
-        auth_credentials_base64 = base64.standard_b64encode(auth_credentials).decode(
-            "UTF-8"
-        )
-        self.__authorization_header_value = f"Basic {auth_credentials_base64}"
-    def add_headers(self, request_headers: Dict[str, str]):
-        request_headers["Authorization"] = self.__authorization_header_value
 # Private API: this is an evolving interface and it will change in the future.
 # Please must not depend on it in your applications.
 class DatabricksOAuthProvider(AuthProvider):

{databricks_sql_connector-3.2.0 → databricks_sql_connector-3.3.0}/src/databricks/sql/auth/retry.py RENAMED Viewed

@@ -7,7 +7,7 @@ from typing import List, Optional, Tuple, Union
 # We only use this import for type hinting
 try:
     # If urllib3~=2.0 is installed
-    from urllib3 import BaseHTTPResponse  # type: ignore
+    from urllib3 import BaseHTTPResponse
 except ImportError:
     # If urllib3~=1.0 is installed
     from urllib3 import HTTPResponse as BaseHTTPResponse
@@ -129,7 +129,7 @@ class DatabricksRetryPolicy(Retry):
         urllib3_kwargs.update(**_urllib_kwargs_we_care_about)
         super().__init__(
-            **urllib3_kwargs,  # type: ignore
+            **urllib3_kwargs,
         )
     @classmethod
@@ -162,7 +162,9 @@ class DatabricksRetryPolicy(Retry):
         new_object.command_type = command_type
         return new_object
-    def new(self, **urllib3_incremented_counters: typing.Any) -> Retry:
+    def new(
+        self, **urllib3_incremented_counters: typing.Any
+    ) -> "DatabricksRetryPolicy":
         """This method is responsible for passing the entire Retry state to its next iteration.
         urllib3 calls Retry.new() between successive requests as part of its `.increment()` method
@@ -210,7 +212,7 @@ class DatabricksRetryPolicy(Retry):
             other=self.other,
             allowed_methods=self.allowed_methods,
             status_forcelist=self.status_forcelist,
-            backoff_factor=self.backoff_factor,  # type: ignore
+            backoff_factor=self.backoff_factor,
             raise_on_redirect=self.raise_on_redirect,
             raise_on_status=self.raise_on_status,
             history=self.history,
@@ -222,7 +224,7 @@ class DatabricksRetryPolicy(Retry):
         urllib3_init_params.update(**urllib3_incremented_counters)
         # Include urllib3's current state in our __init__ params
-        databricks_init_params["urllib3_kwargs"].update(**urllib3_init_params)  # type: ignore
+        databricks_init_params["urllib3_kwargs"].update(**urllib3_init_params)  # type: ignore[attr-defined]
         return type(self).__private_init__(
             retry_start_time=self._retry_start_time,
@@ -274,7 +276,7 @@ class DatabricksRetryPolicy(Retry):
                 f"Retry request would exceed Retry policy max retry duration of {self.stop_after_attempts_duration} seconds"
             )
-    def sleep_for_retry(self, response: BaseHTTPResponse) -> bool:  # type: ignore
+    def sleep_for_retry(self, response: BaseHTTPResponse) -> bool:
         """Sleeps for the duration specified in the response Retry-After header, if present
         A MaxRetryDurationError will be raised if doing so would exceed self.max_attempts_duration
@@ -325,7 +327,8 @@ class DatabricksRetryPolicy(Retry):
                default, this means ExecuteStatement is only retried for codes 429 and 503.
                This limit prevents automatically retrying non-idempotent commands that could
                be destructive.
-            5. The request received a 403 response, because this can never succeed.
+            5. The request received a 401 response, because this can never succeed.
+            6. The request received a 403 response, because this can never succeed.
         Q: What about OSErrors and Redirects?
@@ -339,6 +342,11 @@ class DatabricksRetryPolicy(Retry):
         if status_code == 200:
             return False, "200 codes are not retried"
+        if status_code == 401:
+            raise NonRecoverableNetworkError(
+                "Received 401 - UNAUTHORIZED. Confirm your authentication credentials."
+            )
         if status_code == 403:
             raise NonRecoverableNetworkError(
                 "Received 403 - FORBIDDEN. Confirm your authentication credentials."
@@ -349,7 +357,7 @@ class DatabricksRetryPolicy(Retry):
             raise NonRecoverableNetworkError("Received code 501 from server.")
         # Request failed and this method is not retryable. We only retry POST requests.
-        if not self._is_method_retryable(method):  # type: ignore
+        if not self._is_method_retryable(method):
             return False, "Only POST requests are retried"
         # Request failed with 404 and was a GetOperationStatus. This is not recoverable. Don't retry.

{databricks_sql_connector-3.2.0 → databricks_sql_connector-3.3.0}/src/databricks/sql/client.py RENAMED Viewed

@@ -59,7 +59,7 @@ class Connection:
         http_path: str,
         access_token: Optional[str] = None,
         http_headers: Optional[List[Tuple[str, str]]] = None,
-        session_configuration: Dict[str, Any] = None,
+        session_configuration: Optional[Dict[str, Any]] = None,
         catalog: Optional[str] = None,
         schema: Optional[str] = None,
         _use_arrow_native_complex_types: Optional[bool] = True,
@@ -163,16 +163,16 @@ class Connection:
         # Internal arguments in **kwargs:
         # _user_agent_entry
         #   Tag to add to User-Agent header. For use by partners.
-        # _username, _password
-        #   Username and password Basic authentication (no official support)
         # _use_cert_as_auth
-        #  Use a TLS cert instead of a token or username / password (internal use only)
+        #  Use a TLS cert instead of a token
         # _enable_ssl
         #  Connect over HTTP instead of HTTPS
         # _port
         #  Which port to connect to
         # _skip_routing_headers:
         #  Don't set routing headers if set to True (for use when connecting directly to server)
+        # _tls_no_verify
+        #   Set to True (Boolean) to completely disable SSL verification.
         # _tls_verify_hostname
         #   Set to False (Boolean) to disable SSL hostname verification, but check certificate.
         # _tls_trusted_ca_file
@@ -460,9 +460,9 @@ class Cursor:
         output: List[TDbsqlParameter] = []
         for p in params:
             if isinstance(p, DbsqlParameterBase):
-                output.append(p)  # type: ignore
+                output.append(p)
             else:
-                output.append(dbsql_parameter_from_primitive(value=p))  # type: ignore
+                output.append(dbsql_parameter_from_primitive(value=p))
         return output
@@ -640,7 +640,7 @@ class Cursor:
             )
     def _handle_staging_put(
-        self, presigned_url: str, local_file: str, headers: dict = None
+        self, presigned_url: str, local_file: str, headers: Optional[dict] = None
     ):
         """Make an HTTP PUT request
@@ -655,7 +655,7 @@ class Cursor:
         # fmt: off
         # Design borrowed from: https://stackoverflow.com/a/2342589/5093960
         OK = requests.codes.ok                  # 200
         CREATED = requests.codes.created        # 201
         ACCEPTED = requests.codes.accepted      # 202
@@ -675,7 +675,7 @@ class Cursor:
             )
     def _handle_staging_get(
-        self, local_file: str, presigned_url: str, headers: dict = None
+        self, local_file: str, presigned_url: str, headers: Optional[dict] = None
     ):
         """Make an HTTP GET request, create a local file with the received data
@@ -697,7 +697,9 @@ class Cursor:
         with open(local_file, "wb") as fp:
             fp.write(r.content)
-    def _handle_staging_remove(self, presigned_url: str, headers: dict = None):
+    def _handle_staging_remove(
+        self, presigned_url: str, headers: Optional[dict] = None
+    ):
         """Make an HTTP DELETE request to the presigned_url"""
         r = requests.delete(url=presigned_url, headers=headers)
@@ -757,7 +759,7 @@ class Cursor:
             normalized_parameters = self._normalize_tparametercollection(parameters)
             param_structure = self._determine_parameter_structure(normalized_parameters)
             transformed_operation = transform_paramstyle(
-                operation, normalized_parameters, param_structure  # type: ignore
+                operation, normalized_parameters, param_structure
             )
             prepared_operation, prepared_params = self._prepare_native_parameters(
                 transformed_operation, normalized_parameters, param_structure
@@ -861,7 +863,7 @@ class Cursor:
         catalog_name: Optional[str] = None,
         schema_name: Optional[str] = None,
         table_name: Optional[str] = None,
-        table_types: List[str] = None,
+        table_types: Optional[List[str]] = None,
     ) -> "Cursor":
         """
         Get tables corresponding to the catalog_name, schema_name and table_name.

databricks_sql_connector-3.3.0/src/databricks/sql/cloudfetch/download_manager.py ADDED Viewed

@@ -0,0 +1,107 @@
+import logging
+from ssl import SSLContext
+from concurrent.futures import ThreadPoolExecutor, Future
+from typing import List, Union
+from databricks.sql.cloudfetch.downloader import (
+    ResultSetDownloadHandler,
+    DownloadableResultSettings,
+    DownloadedFile,
+)
+from databricks.sql.thrift_api.TCLIService.ttypes import TSparkArrowResultLink
+logger = logging.getLogger(__name__)
+class ResultFileDownloadManager:
+    def __init__(
+        self,
+        links: List[TSparkArrowResultLink],
+        max_download_threads: int,
+        lz4_compressed: bool,
+        ssl_context: SSLContext,
+    ):
+        self._pending_links: List[TSparkArrowResultLink] = []
+        for link in links:
+            if link.rowCount <= 0:
+                continue
+            logger.debug(
+                "ResultFileDownloadManager: adding file link, start offset {}, row count: {}".format(
+                    link.startRowOffset, link.rowCount
+                )
+            )
+            self._pending_links.append(link)
+        self._download_tasks: List[Future[DownloadedFile]] = []
+        self._max_download_threads: int = max_download_threads
+        self._thread_pool = ThreadPoolExecutor(max_workers=self._max_download_threads)
+        self._downloadable_result_settings = DownloadableResultSettings(lz4_compressed)
+        self._ssl_context = ssl_context
+    def get_next_downloaded_file(
+        self, next_row_offset: int
+    ) -> Union[DownloadedFile, None]:
+        """
+        Get next file that starts at given offset.
+        This function gets the next downloaded file in which its rows start at the specified next_row_offset
+        in relation to the full result. File downloads are scheduled if not already, and once the correct
+        download handler is located, the function waits for the download status and returns the resulting file.
+        If there are no more downloads, a download was not successful, or the correct file could not be located,
+        this function shuts down the thread pool and returns None.
+        Args:
+            next_row_offset (int): The offset of the starting row of the next file we want data from.
+        """
+        # Make sure the download queue is always full
+        self._schedule_downloads()
+        # No more files to download from this batch of links
+        if len(self._download_tasks) == 0:
+            self._shutdown_manager()
+            return None
+        task = self._download_tasks.pop(0)
+        # Future's `result()` method will wait for the call to complete, and return
+        # the value returned by the call. If the call throws an exception - `result()`
+        # will throw the same exception
+        file = task.result()
+        if (next_row_offset < file.start_row_offset) or (
+            next_row_offset > file.start_row_offset + file.row_count
+        ):
+            logger.debug(
+                "ResultFileDownloadManager: file does not contain row {}, start {}, row count {}".format(
+                    next_row_offset, file.start_row_offset, file.row_count
+                )
+            )
+        return file
+    def _schedule_downloads(self):
+        """
+        While download queue has a capacity, peek pending links and submit them to thread pool.
+        """
+        logger.debug("ResultFileDownloadManager: schedule downloads")
+        while (len(self._download_tasks) < self._max_download_threads) and (
+            len(self._pending_links) > 0
+        ):
+            link = self._pending_links.pop(0)
+            logger.debug(
+                "- start: {}, row count: {}".format(link.startRowOffset, link.rowCount)
+            )
+            handler = ResultSetDownloadHandler(
+                settings=self._downloadable_result_settings,
+                link=link,
+                ssl_context=self._ssl_context,
+            )
+            task = self._thread_pool.submit(handler.run)
+            self._download_tasks.append(task)
+    def _shutdown_manager(self):
+        # Clear download handlers and shutdown the thread pool
+        self._pending_links = []
+        self._download_tasks = []
+        self._thread_pool.shutdown(wait=False)

databricks_sql_connector-3.3.0/src/databricks/sql/cloudfetch/downloader.py ADDED Viewed

@@ -0,0 +1,177 @@
+import logging
+from dataclasses import dataclass
+import requests
+from requests.adapters import HTTPAdapter, Retry
+from ssl import SSLContext, CERT_NONE
+import lz4.frame
+import time
+from databricks.sql.thrift_api.TCLIService.ttypes import TSparkArrowResultLink
+from databricks.sql.exc import Error
+logger = logging.getLogger(__name__)
+# TODO: Ideally, we should use a common retry policy (DatabricksRetryPolicy) for all the requests across the library.
+#       But DatabricksRetryPolicy should be updated first - currently it can work only with Thrift requests
+retryPolicy = Retry(
+    total=5,  # max retry attempts
+    backoff_factor=1,  # min delay, 1 second
+    # TODO: `backoff_max` is supported since `urllib3` v2.0.0, but we allow >= 1.26.
+    #       The default value (120 seconds) used since v1.26 looks reasonable enough
+    # backoff_max=60,  # max delay, 60 seconds
+    # retry all status codes below 100, 429 (Too Many Requests), and all codes above 500,
+    # excluding 501 Not implemented
+    status_forcelist=[*range(0, 101), 429, 500, *range(502, 1000)],
+)
+@dataclass
+class DownloadedFile:
+    """
+    Class for the result file and metadata.
+    Attributes:
+        file_bytes (bytes): Downloaded file in bytes.
+        start_row_offset (int): The offset of the starting row in relation to the full result.
+        row_count (int): Number of rows the file represents in the result.
+    """
+    file_bytes: bytes
+    start_row_offset: int
+    row_count: int
+@dataclass
+class DownloadableResultSettings:
+    """
+    Class for settings common to each download handler.
+    Attributes:
+        is_lz4_compressed (bool): Whether file is expected to be lz4 compressed.
+        link_expiry_buffer_secs (int): Time in seconds to prevent download of a link before it expires. Default 0 secs.
+        download_timeout (int): Timeout for download requests. Default 60 secs.
+        max_consecutive_file_download_retries (int): Number of consecutive download retries before shutting down.
+    """
+    is_lz4_compressed: bool
+    link_expiry_buffer_secs: int = 0
+    download_timeout: int = 60
+    max_consecutive_file_download_retries: int = 0
+class ResultSetDownloadHandler:
+    def __init__(
+        self,
+        settings: DownloadableResultSettings,
+        link: TSparkArrowResultLink,
+        ssl_context: SSLContext,
+    ):
+        self.settings = settings
+        self.link = link
+        self._ssl_context = ssl_context
+    def run(self) -> DownloadedFile:
+        """
+        Download the file described in the cloud fetch link.
+        This function checks if the link has or is expiring, gets the file via a requests session, decompresses the
+        file, and signals to waiting threads that the download is finished and whether it was successful.
+        """
+        logger.debug(
+            "ResultSetDownloadHandler: starting file download, offset {}, row count {}".format(
+                self.link.startRowOffset, self.link.rowCount
+            )
+        )
+        # Check if link is already expired or is expiring
+        ResultSetDownloadHandler._validate_link(
+            self.link, self.settings.link_expiry_buffer_secs
+        )
+        session = requests.Session()
+        session.mount("http://", HTTPAdapter(max_retries=retryPolicy))
+        session.mount("https://", HTTPAdapter(max_retries=retryPolicy))
+        ssl_verify = self._ssl_context.verify_mode != CERT_NONE
+        try:
+            # Get the file via HTTP request
+            response = session.get(
+                self.link.fileLink,
+                timeout=self.settings.download_timeout,
+                verify=ssl_verify,
+            )
+            response.raise_for_status()
+            # Save (and decompress if needed) the downloaded file
+            compressed_data = response.content
+            decompressed_data = (
+                ResultSetDownloadHandler._decompress_data(compressed_data)
+                if self.settings.is_lz4_compressed
+                else compressed_data
+            )
+            # The size of the downloaded file should match the size specified from TSparkArrowResultLink
+            if len(decompressed_data) != self.link.bytesNum:
+                logger.debug(
+                    "ResultSetDownloadHandler: downloaded file size {} does not match the expected value {}".format(
+                        len(decompressed_data), self.link.bytesNum
+                    )
+                )
+            logger.debug(
+                "ResultSetDownloadHandler: successfully downloaded file, offset {}, row count {}".format(
+                    self.link.startRowOffset, self.link.rowCount
+                )
+            )
+            return DownloadedFile(
+                decompressed_data,
+                self.link.startRowOffset,
+                self.link.rowCount,
+            )
+        finally:
+            if session:
+                session.close()
+    @staticmethod
+    def _validate_link(link: TSparkArrowResultLink, expiry_buffer_secs: int):
+        """
+        Check if a link has expired or will expire.
+        Expiry buffer can be set to avoid downloading files that has not expired yet when the function is called,
+        but may expire before the file has fully downloaded.
+        """
+        current_time = int(time.time())
+        if (
+            link.expiryTime <= current_time
+            or link.expiryTime - current_time <= expiry_buffer_secs
+        ):
+            raise Error("CloudFetch link has expired")
+    @staticmethod
+    def _decompress_data(compressed_data: bytes) -> bytes:
+        """
+        Decompress lz4 frame compressed data.
+        Decompresses data that has been lz4 compressed, either via the whole frame or by series of chunks.
+        """
+        uncompressed_data, bytes_read = lz4.frame.decompress(
+            compressed_data, return_bytes_read=True
+        )
+        # The last cloud fetch file of the entire result is commonly punctuated by frequent end-of-frame markers.
+        # Full frame decompression above will short-circuit, so chunking is necessary
+        if bytes_read < len(compressed_data):
+            d_context = lz4.frame.create_decompression_context()
+            start = 0
+            uncompressed_data = bytearray()
+            while start < len(compressed_data):
+                data, num_bytes, is_end = lz4.frame.decompress_chunk(
+                    d_context, compressed_data[start:]
+                )
+                uncompressed_data += data
+                start += num_bytes
+        return uncompressed_data

{databricks_sql_connector-3.2.0/src/databricks → databricks_sql_connector-3.3.0/src/databricks/sql/parameters}/py.typed RENAMED Viewed

File without changes

{databricks_sql_connector-3.2.0/src/databricks/sql/parameters → databricks_sql_connector-3.3.0/src/databricks/sql}/py.typed RENAMED Viewed

File without changes

databricks-sql-connector 3.2.0__tar.gz → 3.3.0__tar.gz

databricks-sql-connector 3.2.0tar.gz → 3.3.0tar.gz