PyPI - databricks-sdk - Versions diffs - 0.67.0__py3-none-any.whl → 0.69.0__py3-none-any.whl - Mend

databricks-sdk 0.67.0py3-none-any.whl → 0.69.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of databricks-sdk might be problematic. Click here for more details.

Files changed (49) hide show

databricks/sdk/__init__.py +14 -10
databricks/sdk/_base_client.py +4 -1
databricks/sdk/common/lro.py +17 -0
databricks/sdk/common/types/__init__.py +0 -0
databricks/sdk/common/types/fieldmask.py +39 -0
databricks/sdk/config.py +62 -14
databricks/sdk/credentials_provider.py +61 -12
databricks/sdk/dbutils.py +5 -1
databricks/sdk/errors/parser.py +8 -3
databricks/sdk/mixins/files.py +1156 -111
databricks/sdk/mixins/files_utils.py +293 -0
databricks/sdk/oidc_token_supplier.py +80 -0
databricks/sdk/retries.py +102 -2
databricks/sdk/service/_internal.py +93 -1
databricks/sdk/service/agentbricks.py +1 -1
databricks/sdk/service/apps.py +264 -1
databricks/sdk/service/billing.py +2 -3
databricks/sdk/service/catalog.py +1026 -540
databricks/sdk/service/cleanrooms.py +3 -3
databricks/sdk/service/compute.py +21 -33
databricks/sdk/service/dashboards.py +7 -3
databricks/sdk/service/database.py +3 -2
databricks/sdk/service/dataquality.py +1145 -0
databricks/sdk/service/files.py +2 -1
databricks/sdk/service/iam.py +2 -1
databricks/sdk/service/iamv2.py +1 -1
databricks/sdk/service/jobs.py +6 -9
databricks/sdk/service/marketplace.py +3 -1
databricks/sdk/service/ml.py +3 -1
databricks/sdk/service/oauth2.py +1 -1
databricks/sdk/service/pipelines.py +5 -6
databricks/sdk/service/provisioning.py +544 -655
databricks/sdk/service/qualitymonitorv2.py +1 -1
databricks/sdk/service/serving.py +3 -1
databricks/sdk/service/settings.py +5 -2
databricks/sdk/service/settingsv2.py +1 -1
databricks/sdk/service/sharing.py +12 -3
databricks/sdk/service/sql.py +305 -70
databricks/sdk/service/tags.py +1 -1
databricks/sdk/service/vectorsearch.py +3 -1
databricks/sdk/service/workspace.py +70 -17
databricks/sdk/version.py +1 -1
{databricks_sdk-0.67.0.dist-info → databricks_sdk-0.69.0.dist-info}/METADATA +4 -2
databricks_sdk-0.69.0.dist-info/RECORD +84 -0
databricks_sdk-0.67.0.dist-info/RECORD +0 -79
{databricks_sdk-0.67.0.dist-info → databricks_sdk-0.69.0.dist-info}/WHEEL +0 -0
{databricks_sdk-0.67.0.dist-info → databricks_sdk-0.69.0.dist-info}/licenses/LICENSE +0 -0
{databricks_sdk-0.67.0.dist-info → databricks_sdk-0.69.0.dist-info}/licenses/NOTICE +0 -0
{databricks_sdk-0.67.0.dist-info → databricks_sdk-0.69.0.dist-info}/top_level.txt +0 -0

databricks/sdk/mixins/files.py CHANGED Viewed

@@ -3,6 +3,7 @@ from __future__ import annotations
 import base64
 import datetime
 import logging
+import math
 import os
 import pathlib
 import platform
@@ -13,8 +14,13 @@ import xml.etree.ElementTree as ET
 from abc import ABC, abstractmethod
 from collections import deque
 from collections.abc import Iterator
+from concurrent.futures import ThreadPoolExecutor, as_completed
+from dataclasses import dataclass
 from datetime import timedelta
 from io import BytesIO
+from queue import Empty, Full, Queue
+from tempfile import mkstemp
+from threading import Event, Thread
 from types import TracebackType
 from typing import (TYPE_CHECKING, AnyStr, BinaryIO, Callable, Generator,
                     Iterable, Optional, Type, Union)
@@ -27,12 +33,14 @@ from requests import RequestException
 from .._base_client import _BaseClient, _RawResponse, _StreamingResponse
 from .._property import _cached_property
 from ..config import Config
-from ..errors import AlreadyExists, NotFound
+from ..errors import AlreadyExists, NotFound, PermissionDenied
 from ..errors.mapper import _error_mapper
 from ..retries import retried
 from ..service import files
 from ..service._internal import _escape_multi_segment_path_parameter
 from ..service.files import DownloadResponse
+from .files_utils import (CreateDownloadUrlResponse, _ConcatenatedInputStream,
+                          _PresignedUrlDistributor)
 if TYPE_CHECKING:
     from _typeshed import Self
@@ -710,18 +718,70 @@ class DbfsExt(files.DbfsAPI):
         p.delete(recursive=recursive)
+class FallbackToUploadUsingFilesApi(Exception):
+    """Custom exception that signals to fallback to FilesAPI for upload"""
+    def __init__(self, buffer, message):
+        super().__init__(message)
+        self.buffer = buffer
+class FallbackToDownloadUsingFilesApi(Exception):
+    """Custom exception that signals to fallback to FilesAPI for download"""
+    def __init__(self, message):
+        super().__init__(message)
+@dataclass
+class UploadStreamResult:
+    """Result of an upload from stream operation. Currently empty, but can be extended in the future."""
+@dataclass
+class UploadFileResult:
+    """Result of an upload from file operation. Currently empty, but can be extended in the future."""
+@dataclass
+class DownloadFileResult:
+    """Result of a download to file operation. Currently empty, but can be extended in the future."""
 class FilesExt(files.FilesAPI):
     __doc__ = files.FilesAPI.__doc__
     # note that these error codes are retryable only for idempotent operations
-    _RETRYABLE_STATUS_CODES = [408, 429, 500, 502, 503, 504]
+    _RETRYABLE_STATUS_CODES: list[int] = [408, 429, 500, 502, 503, 504]
+    @dataclass(frozen=True)
+    class _UploadContext:
+        target_path: str
+        """The absolute remote path of the target file, e.g. /Volumes/path/to/your/file."""
+        overwrite: Optional[bool]
+        """If true, an existing file will be overwritten. When unspecified, default behavior of the cloud storage provider is performed."""
+        part_size: int
+        """The size of each part in bytes for multipart upload."""
+        batch_size: int
+        """The number of urls to request in a single batch."""
+        content_length: Optional[int] = None
+        """The total size of the content being uploaded, if known."""
+        source_file_path: Optional[str] = None
+        """The local path of the file being uploaded, if applicable."""
+        use_parallel: Optional[bool] = None
+        """If true, the upload will be performed using multiple threads."""
+        parallelism: Optional[int] = None
+        """The number of threads to use for parallel upload, if applicable."""
     def __init__(self, api_client, config: Config):
         super().__init__(api_client)
         self._config = config.copy()
         self._multipart_upload_read_ahead_bytes = 1
-    def download(self, file_path: str) -> DownloadResponse:
+    def download(
+        self,
+        file_path: str,
+    ) -> DownloadResponse:
         """Download a file.
         Downloads a file of any size. The file contents are the response body.
@@ -736,48 +796,462 @@ class FilesExt(files.FilesAPI):
         :returns: :class:`DownloadResponse`
         """
+        if self._config.disable_experimental_files_api_client:
+            _LOG.info("Disable experimental files API client, will use the original download method.")
+            return super().download(file_path)
         initial_response: DownloadResponse = self._open_download_stream(
-            file_path=file_path,
-            start_byte_offset=0,
-            if_unmodified_since_timestamp=None,
+            file_path=file_path, start_byte_offset=0, if_unmodified_since_timestamp=None
         )
         wrapped_response = self._wrap_stream(file_path, initial_response)
         initial_response.contents._response = wrapped_response
         return initial_response
-    def upload(self, file_path: str, contents: BinaryIO, *, overwrite: Optional[bool] = None):
-        """Upload a file.
+    def download_to(
+        self,
+        file_path: str,
+        destination: str,
+        *,
+        overwrite: bool = True,
+        use_parallel: bool = False,
+        parallelism: Optional[int] = None,
+    ) -> DownloadFileResult:
+        """Download a file to a local path. There would be no responses returned if the download is successful.
+        :param file_path: str
+          The remote path of the file, e.g. /Volumes/path/to/your/file
+        :param destination: str
+          The local path where the file will be saved.
+        :param overwrite: bool
+          If true, an existing file will be overwritten. When not specified, assumed True.
+        :param use_parallel: bool
+          If true, the download will be performed using multiple threads.
+        :param parallelism: int
+          The number of parallel threads to use for downloading. If not specified, defaults to the number of CPU cores.
+        :returns: :class:`DownloadFileResult`
+        """
+        if self._config.disable_experimental_files_api_client:
+            raise NotImplementedError(
+                "Experimental files API features are disabled, download_to is not supported. Please use download instead."
+            )
+        # The existence of the target file is checked before starting the download. This is a best-effort check
+        # to avoid overwriting an existing file. However, there is nothing preventing a file from being created
+        # at the destination path after this check and before the file is written, and no way to prevent other
+        # actor from writing to the destination path concurrently.
+        if not overwrite and os.path.exists(destination):
+            raise FileExistsError(destination)
+        if use_parallel:
+            # Parallel download is not supported for Windows due to the limit of only one open file handle
+            # for writing. If parallel download is requested on Windows, fall back to sequential download with
+            # a warning.
+            if platform.system() == "Windows":
+                _LOG.warning("Parallel download is not supported on Windows. Falling back to sequential download.")
+                self._sequential_download_to_file(destination, remote_path=file_path)
+                return DownloadFileResult()
+            if parallelism is None:
+                parallelism = self._config.files_ext_parallel_download_default_parallelism
+            if parallelism < 1 or parallelism > 64:
+                raise ValueError("parallelism must be between 1 and 64")
+            self._parallel_download_with_fallback(file_path, destination, parallelism=parallelism)
+        else:
+            self._sequential_download_to_file(destination, remote_path=file_path)
+        return DownloadFileResult()
+    def _parallel_download_with_fallback(self, remote_path: str, destination: str, parallelism: int) -> None:
+        """Download a file in parallel to a local path. There would be no responses returned if the download is successful.
+        This method first tries to use the Presigned URL for parallel download. If it fails due to permission issues,
+        it falls back to using Files API.
+        :param remote_path: str
+          The remote path of the file, e.g. /Volumes/path/to/your/file
+        :param destination: str
+          The local path where the file will be saved.
+        :param parallelism: int
+          The number of parallel threads to use for downloading.
+        :returns: None
+        """
+        try:
+            self._parallel_download_presigned_url(remote_path, destination, parallelism)
+        except FallbackToDownloadUsingFilesApi as e:
+            _LOG.info("Falling back to Files API download due to permission issues with Presigned URL: %s", e)
+            self._parallel_download_files_api(remote_path, destination, parallelism)
+    def _sequential_download_to_file(
+        self, destination: str, remote_path: str, last_modified: Optional[str] = None
+    ) -> None:
+        with open(destination, "wb") as f:
+            response = self._open_download_stream(
+                file_path=remote_path,
+                start_byte_offset=0,
+                if_unmodified_since_timestamp=last_modified,
+            )
+            wrapped_response = self._wrap_stream(remote_path, response, 0)
+            response.contents._response = wrapped_response
+            shutil.copyfileobj(response.contents, f)
+    def _do_parallel_download(
+        self, remote_path: str, destination: str, parallelism: int, download_chunk: Callable
+    ) -> None:
+        file_info = self.get_metadata(remote_path)
+        file_size = file_info.content_length
+        last_modified = file_info.last_modified
+        # If the file is smaller than the threshold, do not use parallel download.
+        if file_size <= self._config.files_ext_parallel_download_min_file_size:
+            self._sequential_download_to_file(destination, remote_path, last_modified)
+            return
+        part_size = self._config.files_ext_parallel_download_default_part_size
+        part_count = int(math.ceil(file_size / part_size))
+        fd, temp_file = mkstemp()
+        # We are preallocate the file size to the same as the remote file to avoid seeking beyond the file size.
+        os.truncate(temp_file, file_size)
+        os.close(fd)
+        try:
+            aborted = Event()
+            def wrapped_download_chunk(start: int, end: int, last_modified: Optional[str], temp_file: str) -> None:
+                if aborted.is_set():
+                    return
+                additional_headers = {
+                    "Range": f"bytes={start}-{end}",
+                    "If-Unmodified-Since": last_modified,
+                }
+                try:
+                    contents = download_chunk(additional_headers)
+                    with open(temp_file, "r+b") as f:
+                        f.seek(start)
+                        shutil.copyfileobj(contents, f)
+                except Exception as e:
+                    aborted.set()
+                    raise e
+            with ThreadPoolExecutor(max_workers=parallelism) as executor:
+                futures = []
+                # Start the threads to download parts of the file.
+                for i in range(part_count):
+                    start = i * part_size
+                    end = min(start + part_size - 1, file_size - 1)
+                    futures.append(executor.submit(wrapped_download_chunk, start, end, last_modified, temp_file))
+                # Wait for all threads to complete and check for exceptions.
+                for future in as_completed(futures):
+                    exception = future.exception()
+                    if exception:
+                        raise exception
+            # Finally, move the temp file to the destination.
+            shutil.move(temp_file, destination)
+        finally:
+            if os.path.exists(temp_file):
+                os.remove(temp_file)
+    def _parallel_download_presigned_url(self, remote_path: str, destination: str, parallelism: int) -> None:
+        """Download a file in parallel to a local path. There would be no responses returned if the download is successful.
+        :param remote_path: str
+          The remote path of the file, e.g. /Volumes/path/to/your/file
+        :param destination: str
+          The local path where the file will be saved.
+        :param parallelism: int
+          The number of parallel threads to use for downloading.
+        :returns: None
+        """
+        cloud_session = self._create_cloud_provider_session()
+        url_distributor = _PresignedUrlDistributor(lambda: self._create_download_url(remote_path))
-        Uploads a file. The file contents should be sent as the request body as raw bytes (an
-        octet stream); do not encode or otherwise modify the bytes before sending. The contents of the
-        resulting file will be exactly the bytes sent in the request body. If the request is successful, there
-        is no response body.
+        def download_chunk(additional_headers: dict[str, str]) -> BinaryIO:
+            retry_count = 0
+            while retry_count < self._config.files_ext_parallel_download_max_retries:
+                url_and_header, version = url_distributor.get_url()
+                headers = {**url_and_header.headers, **additional_headers}
+                def get_content() -> requests.Response:
+                    return cloud_session.get(url_and_header.url, headers=headers)
+                raw_resp = self._retry_cloud_idempotent_operation(get_content)
+                if FilesExt._is_url_expired_response(raw_resp):
+                    _LOG.info("Presigned URL expired, fetching a new one.")
+                    url_distributor.invalidate_url(version)
+                    retry_count += 1
+                    continue
+                elif raw_resp.status_code == 403:
+                    raise FallbackToDownloadUsingFilesApi("Received 403 Forbidden from presigned URL")
+                raw_resp.raise_for_status()
+                return BytesIO(raw_resp.content)
+            raise ValueError("Exceeded maximum retries for downloading with presigned URL: URL expired too many times")
+        self._do_parallel_download(remote_path, destination, parallelism, download_chunk)
+    def _parallel_download_files_api(self, remote_path: str, destination: str, parallelism: int) -> None:
+        """Download a file in parallel to a local path using FilesAPI. There would be no responses returned if the download is successful.
+        :param remote_path: str
+          The remote path of the file, e.g. /Volumes/path/to/your/file
+        :param destination: str
+          The local path where the file will be saved.
+        :param parallelism: int
+          The number of parallel threads to use for downloading.
+        :returns: None
+        """
+        def download_chunk(additional_headers: dict[str, str]) -> BinaryIO:
+            raw_response: dict = self._api.do(
+                method="GET",
+                path=f"/api/2.0/fs/files{remote_path}",
+                headers=additional_headers,
+                raw=True,
+            )
+            return raw_response["contents"]
+        self._do_parallel_download(remote_path, destination, parallelism, download_chunk)
+    def _get_optimized_performance_parameters_for_upload(
+        self, content_length: Optional[int], part_size_overwrite: Optional[int]
+    ) -> (int, int):
+        """Get optimized part size and batch size for upload based on content length and provided part size.
+        Returns tuple of (part_size, batch_size).
+        """
+        chosen_part_size = None
+        # 1. decide on the part size
+        if part_size_overwrite is not None:  # If a part size is provided, we use it directly after validation.
+            if part_size_overwrite > self._config.files_ext_multipart_upload_max_part_size:
+                raise ValueError(
+                    f"Part size {part_size_overwrite} exceeds maximum allowed size {self._config.files_ext_multipart_upload_max_part_size} bytes."
+                )
+            chosen_part_size = part_size_overwrite
+            _LOG.debug(f"Using provided part size: {chosen_part_size} bytes")
+        else:  # If no part size is provided, we will optimize based on the content length.
+            if content_length is not None:
+                # Choosing the smallest part size that allows for a maximum of 100 parts.
+                for part_size in self._config.files_ext_multipart_upload_part_size_options:
+                    part_num = (content_length + part_size - 1) // part_size
+                    if part_num <= 100:
+                        chosen_part_size = part_size
+                        _LOG.debug(
+                            f"Optimized part size for upload: {chosen_part_size} bytes for content length {content_length} bytes"
+                        )
+                        break
+                if chosen_part_size is None:  # If no part size was chosen, we default to the maximum allowed part size.
+                    chosen_part_size = self._config.files_ext_multipart_upload_max_part_size
+        # Use defaults if not determined yet
+        if chosen_part_size is None:
+            chosen_part_size = self._config.files_ext_multipart_upload_default_part_size
+        # 2. decide on the batch size
+        if content_length is not None and chosen_part_size is not None:
+            part_num = (content_length + chosen_part_size - 1) // chosen_part_size
+            chosen_batch_size = int(
+                math.ceil(math.sqrt(part_num))
+            )  # Using the square root of the number of parts as a heuristic for batch size.
+        else:
+            chosen_batch_size = self._config.files_ext_multipart_upload_batch_url_count
+        return chosen_part_size, chosen_batch_size
+    def upload(
+        self,
+        file_path: str,
+        content: BinaryIO,
+        *,
+        overwrite: Optional[bool] = None,
+        part_size: Optional[int] = None,
+        use_parallel: bool = True,
+        parallelism: Optional[int] = None,
+    ) -> UploadStreamResult:
+        """
+        Upload a file with stream interface.
+        :param file_path: str
+            The absolute remote path of the target file, e.g. /Volumes/path/to/your/file
+        :param content: BinaryIO
+            The contents of the file to upload. This must be a BinaryIO stream.
+        :param overwrite: bool (optional)
+            If true, an existing file will be overwritten. When not specified, assumed True.
+        :param part_size: int (optional)
+            If set, multipart upload will use the value as its size per uploading part.
+        :param use_parallel: bool (optional)
+            If true, the upload will be performed using multiple threads. Be aware that this will consume more memory
+            because multiple parts will be buffered in memory before being uploaded. The amount of memory used is proportional
+            to `parallelism * part_size`.
+            If false, the upload will be performed in a single thread.
+            Default is True.
+        :param parallelism: int (optional)
+            The number of threads to use for parallel uploads. This is only used if `use_parallel` is True.
+        :returns: :class:`UploadStreamResult`
+        """
+        if self._config.disable_experimental_files_api_client:
+            _LOG.info("Disable experimental files API client, will use the original upload method.")
+            super().upload(file_path=file_path, contents=content, overwrite=overwrite)
+            return UploadStreamResult()
+        _LOG.debug(f"Uploading file from BinaryIO stream")
+        if parallelism is not None and not use_parallel:
+            raise ValueError("parallelism can only be set if use_parallel is True")
+        if parallelism is None and use_parallel:
+            parallelism = self._config.files_ext_multipart_upload_default_parallelism
+        # Determine content length if the stream is seekable
+        content_length = None
+        if content.seekable():
+            _LOG.debug(f"Uploading using seekable mode")
+            # If the stream is seekable, we can read its size.
+            content.seek(0, os.SEEK_END)
+            content_length = content.tell()
+            content.seek(0)
+        # Get optimized part size and batch size based on content length and provided part size
+        optimized_part_size, optimized_batch_size = self._get_optimized_performance_parameters_for_upload(
+            content_length, part_size
+        )
+        # Create context with all final parameters
+        ctx = self._UploadContext(
+            target_path=file_path,
+            overwrite=overwrite,
+            part_size=optimized_part_size,
+            batch_size=optimized_batch_size,
+            content_length=content_length,
+            use_parallel=use_parallel,
+            parallelism=parallelism,
+        )
+        _LOG.debug(
+            f"Upload context: part_size={ctx.part_size}, batch_size={ctx.batch_size}, content_length={ctx.content_length}"
+        )
+        if ctx.use_parallel:
+            self._parallel_upload_from_stream(ctx, content)
+            return UploadStreamResult()
+        elif ctx.content_length is not None:
+            self._upload_single_thread_with_known_size(ctx, content)
+            return UploadStreamResult()
+        else:
+            _LOG.debug(f"Uploading using non-seekable mode")
+            # If the stream is not seekable, we cannot determine its size.
+            # We will use a multipart upload.
+            _LOG.debug(f"Using multipart upload for non-seekable input stream of unknown size for file {file_path}")
+            self._single_thread_multipart_upload(ctx, content)
+            return UploadStreamResult()
+    def upload_from(
+        self,
+        file_path: str,
+        source_path: str,
+        *,
+        overwrite: Optional[bool] = None,
+        part_size: Optional[int] = None,
+        use_parallel: bool = True,
+        parallelism: Optional[int] = None,
+    ) -> UploadFileResult:
+        """Upload a file directly from a local path.
         :param file_path: str
           The absolute remote path of the target file.
-        :param contents: BinaryIO
+        :param source_path: str
+          The local path of the file to upload. This must be a path to a local file.
+        :param part_size: int
+          The size of each part in bytes for multipart upload. This is a required parameter for multipart uploads.
         :param overwrite: bool (optional)
           If true, an existing file will be overwritten. When not specified, assumed True.
-        """
+        :param use_parallel: bool (optional)
+          If true, the upload will be performed using multiple threads. Default is True.
+        :param parallelism: int (optional)
+          The number of threads to use for parallel uploads. This is only used if `use_parallel` is True.
+          If not specified, the default parallelism will be set to config.multipart_upload_default_parallelism
-        # Upload empty and small files with one-shot upload.
-        pre_read_buffer = contents.read(self._config.multipart_upload_min_stream_size)
-        if len(pre_read_buffer) < self._config.multipart_upload_min_stream_size:
-            _LOG.debug(
-                f"Using one-shot upload for input stream of size {len(pre_read_buffer)} below {self._config.multipart_upload_min_stream_size} bytes"
+        :returns: :class:`UploadFileResult`
+        """
+        if self._config.disable_experimental_files_api_client:
+            raise NotImplementedError(
+                "Experimental files API features are disabled, upload_from is not supported. Please use upload instead."
             )
-            return super().upload(file_path=file_path, contents=BytesIO(pre_read_buffer), overwrite=overwrite)
+        _LOG.debug(f"Uploading file from local path: {source_path}")
+        if parallelism is not None and not use_parallel:
+            raise ValueError("parallelism can only be set if use_parallel is True")
+        if parallelism is None and use_parallel:
+            parallelism = self._config.files_ext_multipart_upload_default_parallelism
+        # Get the file size
+        file_size = os.path.getsize(source_path)
+        # Get optimized part size and batch size based on content length and provided part size
+        optimized_part_size, optimized_batch_size = self._get_optimized_performance_parameters_for_upload(
+            file_size, part_size
+        )
+        # Create context with all final parameters
+        ctx = self._UploadContext(
+            target_path=file_path,
+            overwrite=overwrite,
+            part_size=optimized_part_size,
+            batch_size=optimized_batch_size,
+            content_length=file_size,
+            source_file_path=source_path,
+            use_parallel=use_parallel,
+            parallelism=parallelism,
+        )
+        if ctx.use_parallel:
+            self._parallel_upload_from_file(ctx)
+            return UploadFileResult()
+        else:
+            with open(source_path, "rb") as f:
+                self._upload_single_thread_with_known_size(ctx, f)
+                return UploadFileResult()
+    def _upload_single_thread_with_known_size(self, ctx: _UploadContext, contents: BinaryIO) -> None:
+        """Upload a file with a known size."""
+        if ctx.content_length < self._config.files_ext_multipart_upload_min_stream_size:
+            _LOG.debug(f"Using single-shot upload for input stream of size {ctx.content_length} bytes")
+            return self._single_thread_single_shot_upload(ctx, contents)
+        else:
+            _LOG.debug(f"Using multipart upload for input stream of size {ctx.content_length} bytes")
+            return self._single_thread_multipart_upload(ctx, contents)
+    def _single_thread_single_shot_upload(self, ctx: _UploadContext, contents: BinaryIO) -> None:
+        """Upload a file with a known size."""
+        _LOG.debug(f"Using single-shot upload for input stream")
+        return super().upload(file_path=ctx.target_path, contents=contents, overwrite=ctx.overwrite)
+    def _initiate_multipart_upload(self, ctx: _UploadContext) -> dict:
+        """Initiate a multipart upload and return the response."""
         query = {"action": "initiate-upload"}
-        if overwrite is not None:
-            query["overwrite"] = overwrite
+        if ctx.overwrite is not None:
+            query["overwrite"] = ctx.overwrite
         # Method _api.do() takes care of retrying and will raise an exception in case of failure.
         initiate_upload_response = self._api.do(
-            "POST", f"/api/2.0/fs/files{_escape_multi_segment_path_parameter(file_path)}", query=query
+            "POST", f"/api/2.0/fs/files{_escape_multi_segment_path_parameter(ctx.target_path)}", query=query
         )
+        return initiate_upload_response
+    def _single_thread_multipart_upload(self, ctx: _UploadContext, contents: BinaryIO) -> None:
+        # Upload empty and small files with one-shot upload.
+        pre_read_buffer = contents.read(self._config.files_ext_multipart_upload_min_stream_size)
+        if len(pre_read_buffer) < self._config.files_ext_multipart_upload_min_stream_size:
+            _LOG.debug(
+                f"Using one-shot upload for input stream of size {len(pre_read_buffer)} below {self._config.files_ext_multipart_upload_min_stream_size} bytes"
+            )
+            return self._single_thread_single_shot_upload(ctx, BytesIO(pre_read_buffer))
+        # Initiate the multipart upload.
+        initiate_upload_response = self._initiate_multipart_upload(ctx)
         if initiate_upload_response.get("multipart_upload"):
             cloud_provider_session = self._create_cloud_provider_session()
@@ -786,37 +1260,451 @@ class FilesExt(files.FilesAPI):
                 raise ValueError(f"Unexpected server response: {initiate_upload_response}")
             try:
-                self._perform_multipart_upload(
-                    file_path, contents, session_token, pre_read_buffer, cloud_provider_session
-                )
+                self._perform_multipart_upload(ctx, contents, session_token, pre_read_buffer, cloud_provider_session)
+            except FallbackToUploadUsingFilesApi as e:
+                try:
+                    self._abort_multipart_upload(ctx, session_token, cloud_provider_session)
+                except BaseException as ex:
+                    # Ignore abort exceptions as it is a best-effort.
+                    _LOG.warning(f"Failed to abort upload: {ex}")
+                _LOG.info(f"Falling back to single-shot upload with Files API: {e}")
+                # Concatenate the buffered part and the rest of the stream.
+                full_stream = _ConcatenatedInputStream(BytesIO(e.buffer), contents)
+                return self._single_thread_single_shot_upload(ctx, full_stream)
             except Exception as e:
                 _LOG.info(f"Aborting multipart upload on error: {e}")
                 try:
-                    self._abort_multipart_upload(file_path, session_token, cloud_provider_session)
+                    self._abort_multipart_upload(ctx, session_token, cloud_provider_session)
                 except BaseException as ex:
+                    # Ignore abort exceptions as it is a best-effort.
                     _LOG.warning(f"Failed to abort upload: {ex}")
-                    # ignore, abort is a best-effort
                 finally:
-                    # rethrow original exception
+                    # Rethrow the original exception
                     raise e from None
         elif initiate_upload_response.get("resumable_upload"):
             cloud_provider_session = self._create_cloud_provider_session()
             session_token = initiate_upload_response["resumable_upload"]["session_token"]
-            self._perform_resumable_upload(
-                file_path, contents, session_token, overwrite, pre_read_buffer, cloud_provider_session
-            )
+            try:
+                self._perform_resumable_upload(ctx, contents, session_token, pre_read_buffer, cloud_provider_session)
+            except FallbackToUploadUsingFilesApi as e:
+                _LOG.info(f"Falling back to single-shot upload with Files API: {e}")
+                # Concatenate the buffered part and the rest of the stream.
+                full_stream = _ConcatenatedInputStream(BytesIO(e.buffer), contents)
+                return self._single_thread_single_shot_upload(ctx, full_stream)
         else:
             raise ValueError(f"Unexpected server response: {initiate_upload_response}")
+    def _parallel_upload_from_stream(self, ctx: _UploadContext, contents: BinaryIO) -> None:
+        """
+        Upload a stream using multipart upload with multiple threads.
+        This method is not implemented in this example, but it would typically
+        involve creating multiple threads to upload different parts of the stream concurrently.
+        """
+        initiate_upload_response = self._initiate_multipart_upload(ctx)
+        if initiate_upload_response.get("resumable_upload"):
+            _LOG.warning("GCP does not support parallel resumable uploads, falling back to single-threaded upload")
+            return self._single_thread_multipart_upload(ctx, contents)
+        elif initiate_upload_response.get("multipart_upload"):
+            session_token = initiate_upload_response["multipart_upload"].get("session_token")
+            cloud_provider_session = self._create_cloud_provider_session()
+            if not session_token:
+                raise ValueError(f"Unexpected server response: {initiate_upload_response}")
+            try:
+                self._parallel_multipart_upload_from_stream(ctx, session_token, contents, cloud_provider_session)
+            except FallbackToUploadUsingFilesApi as e:
+                try:
+                    self._abort_multipart_upload(ctx, session_token, cloud_provider_session)
+                except Exception as abort_ex:
+                    _LOG.warning(f"Failed to abort upload: {abort_ex}")
+                _LOG.info(f"Falling back to single-shot upload with Files API: {e}")
+                # Concatenate the buffered part and the rest of the stream.
+                full_stream = _ConcatenatedInputStream(BytesIO(e.buffer), contents)
+                return self._single_thread_single_shot_upload(ctx, full_stream)
+            except Exception as e:
+                _LOG.info(f"Aborting multipart upload on error: {e}")
+                try:
+                    self._abort_multipart_upload(ctx, session_token, cloud_provider_session)
+                except Exception as abort_ex:
+                    _LOG.warning(f"Failed to abort upload: {abort_ex}")
+                finally:
+                    # Rethrow the original exception.
+                    raise e from None
+        else:
+            raise ValueError(f"Unexpected server response: {initiate_upload_response}")
+    def _parallel_upload_from_file(
+        self,
+        ctx: _UploadContext,
+    ) -> None:
+        """
+        Upload a file using multipart upload with multiple threads.
+        This method is not implemented in this example, but it would typically
+        involve creating multiple threads to upload different parts of the file concurrently.
+        """
+        initiate_upload_response = self._initiate_multipart_upload(ctx)
+        if initiate_upload_response.get("multipart_upload"):
+            cloud_provider_session = self._create_cloud_provider_session()
+            session_token = initiate_upload_response["multipart_upload"].get("session_token")
+            if not session_token:
+                raise ValueError(f"Unexpected server response: {initiate_upload_response}")
+            try:
+                self._parallel_multipart_upload_from_file(ctx, session_token)
+            except FallbackToUploadUsingFilesApi as e:
+                try:
+                    self._abort_multipart_upload(ctx, session_token, cloud_provider_session)
+                except Exception as abort_ex:
+                    _LOG.warning(f"Failed to abort upload: {abort_ex}")
+                _LOG.info(f"Falling back to single-shot upload with Files API: {e}")
+                # Concatenate the buffered part and the rest of the stream.
+                with open(ctx.source_file_path, "rb") as f:
+                    return self._single_thread_single_shot_upload(ctx, f)
+            except Exception as e:
+                _LOG.info(f"Aborting multipart upload on error: {e}")
+                try:
+                    self._abort_multipart_upload(ctx, session_token, cloud_provider_session)
+                except Exception as abort_ex:
+                    _LOG.warning(f"Failed to abort upload: {abort_ex}")
+                finally:
+                    # Rethrow the original exception.
+                    raise e from None
+        elif initiate_upload_response.get("resumable_upload"):
+            _LOG.warning("GCP does not support parallel resumable uploads, falling back to single-threaded upload")
+            with open(ctx.source_file_path, "rb") as f:
+                return self._upload_single_thread_with_known_size(ctx, f)
+        else:
+            raise ValueError(f"Unexpected server response: {initiate_upload_response}")
+    @dataclass
+    class _MultipartUploadPart:
+        ctx: FilesExt._UploadContext
+        part_index: int
+        part_offset: int
+        part_size: int
+        session_token: str
+    def _parallel_multipart_upload_from_file(
+        self,
+        ctx: _UploadContext,
+        session_token: str,
+    ) -> None:
+        # Calculate the number of parts.
+        file_size = os.path.getsize(ctx.source_file_path)
+        part_size = ctx.part_size
+        num_parts = (file_size + part_size - 1) // part_size
+        _LOG.debug(f"Uploading file of size {file_size} bytes in {num_parts} parts using {ctx.parallelism} threads")
+        # Create queues and worker threads.
+        task_queue = Queue()
+        etags_result_queue = Queue()
+        exception_queue = Queue()
+        aborted = Event()
+        workers = [
+            Thread(target=self._upload_file_consumer, args=(task_queue, etags_result_queue, exception_queue, aborted))
+            for _ in range(ctx.parallelism)
+        ]
+        _LOG.debug(f"Starting {len(workers)} worker threads for parallel upload")
+        # Enqueue all parts. Since the task queue is populated before starting the workers, we don't need to signal completion.
+        for part_index in range(1, num_parts + 1):
+            part_offset = (part_index - 1) * part_size
+            part_size = min(part_size, file_size - part_offset)
+            part = self._MultipartUploadPart(ctx, part_index, part_offset, part_size, session_token)
+            task_queue.put(part)
+        # Start the worker threads for parallel upload.
+        for worker in workers:
+            worker.start()
+        # Wait for all tasks to be processed.
+        for worker in workers:
+            worker.join()
+        # Check for exceptions: if any worker encountered an exception, raise the first one.
+        if not exception_queue.empty():
+            first_exception = exception_queue.get()
+            raise first_exception
+        # Collect results from the etags queue.
+        etags: dict = {}
+        while not etags_result_queue.empty():
+            part_number, etag = etags_result_queue.get()
+            etags[part_number] = etag
+        self._complete_multipart_upload(ctx, etags, session_token)
+    def _parallel_multipart_upload_from_stream(
+        self,
+        ctx: _UploadContext,
+        session_token: str,
+        content: BinaryIO,
+        cloud_provider_session: requests.Session,
+    ) -> None:
+        task_queue = Queue(maxsize=ctx.parallelism)  # Limit queue size to control memory usage
+        etags_result_queue = Queue()
+        exception_queue = Queue()
+        all_produced = Event()
+        aborted = Event()
+        # Do the first part read ahead
+        pre_read_buffer = content.read(ctx.part_size)
+        if not pre_read_buffer:
+            self._complete_multipart_upload(ctx, {}, session_token)
+            return
+        try:
+            etag = self._do_upload_one_part(
+                ctx, cloud_provider_session, 1, 0, len(pre_read_buffer), session_token, BytesIO(pre_read_buffer)
+            )
+            etags_result_queue.put((1, etag))
+        except FallbackToUploadUsingFilesApi as e:
+            raise FallbackToUploadUsingFilesApi(
+                pre_read_buffer, "Falling back to single-shot upload with Files API"
+            ) from e
+        if len(pre_read_buffer) < ctx.part_size:
+            self._complete_multipart_upload(ctx, {1: etag}, session_token)
+            return
+        def producer() -> None:
+            part_index = 2
+            part_size = ctx.part_size
+            while not aborted.is_set():
+                part_content = content.read(part_size)
+                if not part_content:
+                    break
+                part_offset = (part_index - 1) * part_size
+                part = self._MultipartUploadPart(ctx, part_index, part_offset, len(part_content), session_token)
+                while not aborted.is_set():
+                    try:
+                        task_queue.put((part, part_content), timeout=0.1)
+                        break
+                    except Full:
+                        continue
+                part_index += 1
+            all_produced.set()
+        producer_thread = Thread(target=producer)
+        consumers = [
+            Thread(
+                target=self._upload_stream_consumer,
+                args=(task_queue, etags_result_queue, exception_queue, all_produced, aborted),
+            )
+            for _ in range(ctx.parallelism)
+        ]
+        _LOG.debug(f"Starting {len(consumers)} worker threads for parallel upload")
+        # Start producer and consumer threads
+        producer_thread.start()
+        for consumer in consumers:
+            consumer.start()
+        # Wait for producer to finish
+        _LOG.debug(f"threads started, waiting for producer to finish")
+        producer_thread.join()
+        # Wait for all tasks to be processed
+        _LOG.debug(f"producer finished, waiting for consumers to finish")
+        # task_queue.join()
+        for consumer in consumers:
+            consumer.join()
+        # Check for exceptions: if any worker encountered an exception, raise the first one.
+        if not exception_queue.empty():
+            first_exception = exception_queue.get()
+            raise first_exception
+        # Collect results from the etags queue
+        etags: dict = {}
+        while not etags_result_queue.empty():
+            part_number, etag = etags_result_queue.get()
+            etags[part_number] = etag
+        self._complete_multipart_upload(ctx, etags, session_token)
+    def _complete_multipart_upload(self, ctx, etags, session_token):
+        query = {"action": "complete-upload", "upload_type": "multipart", "session_token": session_token}
+        headers = {"Content-Type": "application/json"}
+        body: dict = {}
+        parts = []
+        for part_number, etag in sorted(etags.items()):
+            part = {"part_number": part_number, "etag": etag}
+            parts.append(part)
+        body["parts"] = parts
+        self._api.do(
+            "POST",
+            f"/api/2.0/fs/files{_escape_multi_segment_path_parameter(ctx.target_path)}",
+            query=query,
+            headers=headers,
+            body=body,
+        )
+    def _upload_file_consumer(
+        self,
+        task_queue: Queue[FilesExt._MultipartUploadPart],
+        etags_queue: Queue[tuple[int, str]],
+        exception_queue: Queue[Exception],
+        aborted: Event,
+    ) -> None:
+        cloud_provider_session = self._create_cloud_provider_session()
+        while not aborted.is_set():
+            try:
+                part = task_queue.get(block=False)
+            except Empty:
+                # The task_queue was populated before the workers were started, so we can exit if it's empty.
+                break
+            try:
+                with open(part.ctx.source_file_path, "rb") as f:
+                    f.seek(part.part_offset, os.SEEK_SET)
+                    part_content = BytesIO(f.read(part.part_size))
+                    etag = self._do_upload_one_part(
+                        part.ctx,
+                        cloud_provider_session,
+                        part.part_index,
+                        part.part_offset,
+                        part.part_size,
+                        part.session_token,
+                        part_content,
+                    )
+                etags_queue.put((part.part_index, etag))
+            except Exception as e:
+                aborted.set()
+                exception_queue.put(e)
+            finally:
+                task_queue.task_done()
+    def _upload_stream_consumer(
+        self,
+        task_queue: Queue[tuple[FilesExt._MultipartUploadPart, bytes]],
+        etags_queue: Queue[tuple[int, str]],
+        exception_queue: Queue[Exception],
+        all_produced: Event,
+        aborted: Event,
+    ) -> None:
+        cloud_provider_session = self._create_cloud_provider_session()
+        while not aborted.is_set():
+            try:
+                (part, content) = task_queue.get(block=False, timeout=0.1)
+            except Empty:
+                if all_produced.is_set():
+                    break  # No more parts will be produced and the queue is empty
+                else:
+                    continue
+            try:
+                etag = self._do_upload_one_part(
+                    part.ctx,
+                    cloud_provider_session,
+                    part.part_index,
+                    part.part_offset,
+                    part.part_size,
+                    part.session_token,
+                    BytesIO(content),
+                )
+                etags_queue.put((part.part_index, etag))
+            except Exception as e:
+                aborted.set()
+                exception_queue.put(e)
+            finally:
+                task_queue.task_done()
+    def _do_upload_one_part(
+        self,
+        ctx: _UploadContext,
+        cloud_provider_session: requests.Session,
+        part_index: int,
+        part_offset: int,
+        part_size: int,
+        session_token: str,
+        part_content: BinaryIO,
+    ) -> str:
+        retry_count = 0
+        # Try to upload the part, retrying if the upload URL expires.
+        while True:
+            body: dict = {
+                "path": ctx.target_path,
+                "session_token": session_token,
+                "start_part_number": part_index,
+                "count": 1,
+                "expire_time": self._get_upload_url_expire_time(),
+            }
+            headers = {"Content-Type": "application/json"}
+            # Requesting URLs for the same set of parts is an idempotent operation and is safe to retry.
+            try:
+                # The _api.do() method handles retries and will raise an exception in case of failure.
+                upload_part_urls_response = self._api.do(
+                    "POST", "/api/2.0/fs/create-upload-part-urls", headers=headers, body=body
+                )
+            except PermissionDenied as e:
+                if self._is_presigned_urls_disabled_error(e):
+                    raise FallbackToUploadUsingFilesApi(None, "Presigned URLs are disabled")
+                else:
+                    raise e from None
+            upload_part_urls = upload_part_urls_response.get("upload_part_urls", [])
+            if len(upload_part_urls) == 0:
+                raise ValueError(f"Unexpected server response: {upload_part_urls_response}")
+            upload_part_url = upload_part_urls[0]
+            url = upload_part_url["url"]
+            required_headers = upload_part_url.get("headers", [])
+            assert part_index == upload_part_url["part_number"]
+            headers: dict = {"Content-Type": "application/octet-stream"}
+            for h in required_headers:
+                headers[h["name"]] = h["value"]
+            _LOG.debug(f"Uploading part {part_index}: [{part_offset}, {part_offset + part_size - 1}]")
+            def rewind() -> None:
+                part_content.seek(0, os.SEEK_SET)
+            def perform_upload() -> requests.Response:
+                return cloud_provider_session.request(
+                    "PUT",
+                    url,
+                    headers=headers,
+                    data=part_content,
+                    timeout=self._config.files_ext_network_transfer_inactivity_timeout_seconds,
+                )
+            upload_response = self._retry_cloud_idempotent_operation(perform_upload, rewind)
+            if upload_response.status_code in (200, 201):
+                etag = upload_response.headers.get("ETag", "")
+                return etag
+            elif FilesExt._is_url_expired_response(upload_response):
+                if retry_count < self._config.files_ext_multipart_upload_max_retries:
+                    retry_count += 1
+                    _LOG.debug("Upload URL expired, retrying...")
+                    continue
+                else:
+                    raise ValueError(f"Unsuccessful chunk upload: upload URL expired after {retry_count} retries")
+            elif upload_response.status_code == 403:
+                raise FallbackToUploadUsingFilesApi(None, f"Direct upload forbidden: {upload_response.content}")
+            else:
+                message = f"Unsuccessful chunk upload. Response status: {upload_response.status_code}, body: {upload_response.content}"
+                _LOG.warning(message)
+                mapped_error = _error_mapper(upload_response, {})
+                raise mapped_error or ValueError(message)
     def _perform_multipart_upload(
         self,
-        target_path: str,
+        ctx: _UploadContext,
         input_stream: BinaryIO,
         session_token: str,
         pre_read_buffer: bytes,
         cloud_provider_session: requests.Session,
-    ):
+    ) -> None:
         """
         Performs multipart upload using presigned URLs on AWS and Azure:
         https://docs.aws.amazon.com/AmazonS3/latest/userguide/mpuoverview.html
@@ -832,7 +1720,7 @@ class FilesExt(files.FilesAPI):
         # AWS signed chunked upload: https://docs.aws.amazon.com/AmazonS3/latest/API/sigv4-streaming.html
         # https://learn.microsoft.com/en-us/azure/storage/blobs/storage-blobs-tune-upload-download-python#buffering-during-uploads
-        chunk_offset = 0  # used only for logging
+        chunk_offset = 0
         # This buffer is expected to contain at least multipart_upload_chunk_size bytes.
         # Note that initially buffer can be bigger (from pre_read_buffer).
@@ -842,37 +1730,43 @@ class FilesExt(files.FilesAPI):
         eof = False
         while not eof:
             # If needed, buffer the next chunk.
-            buffer = FilesExt._fill_buffer(buffer, self._config.multipart_upload_chunk_size, input_stream)
+            buffer = FilesExt._fill_buffer(buffer, ctx.part_size, input_stream)
             if len(buffer) == 0:
                 # End of stream, no need to request the next block of upload URLs.
                 break
             _LOG.debug(
-                f"Multipart upload: requesting next {self._config.multipart_upload_batch_url_count} upload URLs starting from part {current_part_number}"
+                f"Multipart upload: requesting next {ctx.batch_size} upload URLs starting from part {current_part_number}"
             )
             body: dict = {
-                "path": target_path,
+                "path": ctx.target_path,
                 "session_token": session_token,
                 "start_part_number": current_part_number,
-                "count": self._config.multipart_upload_batch_url_count,
-                "expire_time": self._get_url_expire_time(),
+                "count": ctx.batch_size,
+                "expire_time": self._get_upload_url_expire_time(),
             }
             headers = {"Content-Type": "application/json"}
             # Requesting URLs for the same set of parts is an idempotent operation, safe to retry.
-            # Method _api.do() takes care of retrying and will raise an exception in case of failure.
-            upload_part_urls_response = self._api.do(
-                "POST", "/api/2.0/fs/create-upload-part-urls", headers=headers, body=body
-            )
+            try:
+                # Method _api.do() takes care of retrying and will raise an exception in case of failure.
+                upload_part_urls_response = self._api.do(
+                    "POST", "/api/2.0/fs/create-upload-part-urls", headers=headers, body=body
+                )
+            except PermissionDenied as e:
+                if chunk_offset == 0 and self._is_presigned_urls_disabled_error(e):
+                    raise FallbackToUploadUsingFilesApi(buffer, "Presigned URLs are disabled")
+                else:
+                    raise e from None
             upload_part_urls = upload_part_urls_response.get("upload_part_urls", [])
             if len(upload_part_urls) == 0:
                 raise ValueError(f"Unexpected server response: {upload_part_urls_response}")
             for upload_part_url in upload_part_urls:
-                buffer = FilesExt._fill_buffer(buffer, self._config.multipart_upload_chunk_size, input_stream)
+                buffer = FilesExt._fill_buffer(buffer, ctx.part_size, input_stream)
                 actual_buffer_length = len(buffer)
                 if actual_buffer_length == 0:
                     eof = True
@@ -886,7 +1780,7 @@ class FilesExt(files.FilesAPI):
                 for h in required_headers:
                     headers[h["name"]] = h["value"]
-                actual_chunk_length = min(actual_buffer_length, self._config.multipart_upload_chunk_size)
+                actual_chunk_length = min(actual_buffer_length, ctx.part_size)
                 _LOG.debug(
                     f"Uploading part {current_part_number}: [{chunk_offset}, {chunk_offset + actual_chunk_length - 1}]"
                 )
@@ -902,7 +1796,7 @@ class FilesExt(files.FilesAPI):
                         url,
                         headers=headers,
                         data=chunk,
-                        timeout=self._config.multipart_upload_single_chunk_upload_timeout_seconds,
+                        timeout=self._config.files_ext_network_transfer_inactivity_timeout_seconds,
                     )
                 upload_response = self._retry_cloud_idempotent_operation(perform, rewind)
@@ -922,7 +1816,7 @@ class FilesExt(files.FilesAPI):
                     retry_count = 0
                 elif FilesExt._is_url_expired_response(upload_response):
-                    if retry_count < self._config.multipart_upload_max_retries:
+                    if retry_count < self._config.files_ext_multipart_upload_max_retries:
                         retry_count += 1
                         _LOG.debug("Upload URL expired")
                         # Preserve the buffer so we'll upload the current part again using next upload URL
@@ -930,6 +1824,13 @@ class FilesExt(files.FilesAPI):
                         # don't confuse user with unrelated "Permission denied" error.
                         raise ValueError(f"Unsuccessful chunk upload: upload URL expired")
+                elif upload_response.status_code == 403 and chunk_offset == 0:
+                    # We got 403 failure when uploading the very first chunk (we can't tell if it is Azure for sure yet).
+                    # This might happen due to Azure firewall enabled for the customer bucket.
+                    # Let's fallback to using Files API which might be allowlisted to upload, passing
+                    # currently buffered (but not yet uploaded) part of the stream.
+                    raise FallbackToUploadUsingFilesApi(buffer, f"Direct upload forbidden: {upload_response.content}")
                 else:
                     message = f"Unsuccessful chunk upload. Response status: {upload_response.status_code}, body: {upload_response.content}"
                     _LOG.warning(message)
@@ -938,9 +1839,7 @@ class FilesExt(files.FilesAPI):
                 current_part_number += 1
-        _LOG.debug(
-            f"Completing multipart upload after uploading {len(etags)} parts of up to {self._config.multipart_upload_chunk_size} bytes"
-        )
+        _LOG.debug(f"Completing multipart upload after uploading {len(etags)} parts of up to {ctx.part_size} bytes")
         query = {"action": "complete-upload", "upload_type": "multipart", "session_token": session_token}
         headers = {"Content-Type": "application/json"}
@@ -957,14 +1856,14 @@ class FilesExt(files.FilesAPI):
         # Method _api.do() takes care of retrying and will raise an exception in case of failure.
         self._api.do(
             "POST",
-            f"/api/2.0/fs/files{_escape_multi_segment_path_parameter(target_path)}",
+            f"/api/2.0/fs/files{_escape_multi_segment_path_parameter(ctx.target_path)}",
             query=query,
             headers=headers,
             body=body,
         )
     @staticmethod
-    def _fill_buffer(buffer: bytes, desired_min_size: int, input_stream: BinaryIO):
+    def _fill_buffer(buffer: bytes, desired_min_size: int, input_stream: BinaryIO) -> bytes:
         """
         Tries to fill given buffer to contain at least `desired_min_size` bytes by reading from input stream.
         """
@@ -978,7 +1877,7 @@ class FilesExt(files.FilesAPI):
             return buffer
     @staticmethod
-    def _is_url_expired_response(response: requests.Response):
+    def _is_url_expired_response(response: requests.Response) -> bool:
         """
         Checks if response matches one of the known "URL expired" responses from the cloud storage providers.
         """
@@ -1011,15 +1910,21 @@ class FilesExt(files.FilesAPI):
         return False
+    def _is_presigned_urls_disabled_error(self, e: PermissionDenied) -> bool:
+        error_infos = e.get_error_info()
+        for error_info in error_infos:
+            if error_info.reason == "FILES_API_API_IS_NOT_ENABLED":
+                return True
+        return False
     def _perform_resumable_upload(
         self,
-        target_path: str,
+        ctx: _UploadContext,
         input_stream: BinaryIO,
         session_token: str,
-        overwrite: bool,
         pre_read_buffer: bytes,
         cloud_provider_session: requests.Session,
-    ):
+    ) -> None:
         """
         Performs resumable upload on GCP: https://cloud.google.com/storage/docs/performing-resumable-uploads
         """
@@ -1047,14 +1952,20 @@ class FilesExt(files.FilesAPI):
         # On the contrary, in multipart upload we can decide to complete upload *after*
         # last chunk has been sent.
-        body: dict = {"path": target_path, "session_token": session_token}
+        body: dict = {"path": ctx.target_path, "session_token": session_token}
         headers = {"Content-Type": "application/json"}
-        # Method _api.do() takes care of retrying and will raise an exception in case of failure.
-        resumable_upload_url_response = self._api.do(
-            "POST", "/api/2.0/fs/create-resumable-upload-url", headers=headers, body=body
-        )
+        try:
+            # Method _api.do() takes care of retrying and will raise an exception in case of failure.
+            resumable_upload_url_response = self._api.do(
+                "POST", "/api/2.0/fs/create-resumable-upload-url", headers=headers, body=body
+            )
+        except PermissionDenied as e:
+            if self._is_presigned_urls_disabled_error(e):
+                raise FallbackToUploadUsingFilesApi(pre_read_buffer, "Presigned URLs are disabled")
+            else:
+                raise e from None
         resumable_upload_url_node = resumable_upload_url_response.get("resumable_upload_url")
         if not resumable_upload_url_node:
@@ -1069,7 +1980,7 @@ class FilesExt(files.FilesAPI):
         try:
             # We will buffer this many bytes: one chunk + read-ahead block.
             # Note buffer may contain more data initially (from pre_read_buffer).
-            min_buffer_size = self._config.multipart_upload_chunk_size + self._multipart_upload_read_ahead_bytes
+            min_buffer_size = ctx.part_size + self._multipart_upload_read_ahead_bytes
             buffer = pre_read_buffer
@@ -1094,7 +2005,7 @@ class FilesExt(files.FilesAPI):
                     file_size = chunk_offset + actual_chunk_length
                 else:
                     # More chunks expected, let's upload current chunk (excluding read-ahead block).
-                    actual_chunk_length = self._config.multipart_upload_chunk_size
+                    actual_chunk_length = ctx.part_size
                     file_size = "*"
                 headers: dict = {"Content-Type": "application/octet-stream"}
@@ -1113,7 +2024,7 @@ class FilesExt(files.FilesAPI):
                             resumable_upload_url,
                             headers={"Content-Range": "bytes */*"},
                             data=b"",
-                            timeout=self._config.multipart_upload_single_chunk_upload_timeout_seconds,
+                            timeout=self._config.files_ext_network_transfer_inactivity_timeout_seconds,
                         )
                     try:
@@ -1128,7 +2039,7 @@ class FilesExt(files.FilesAPI):
                         resumable_upload_url,
                         headers=headers,
                         data=BytesIO(buffer[:actual_chunk_length]),
-                        timeout=self._config.multipart_upload_single_chunk_upload_timeout_seconds,
+                        timeout=self._config.files_ext_network_transfer_inactivity_timeout_seconds,
                     )
                     # https://cloud.google.com/storage/docs/performing-resumable-uploads#resume-upload
@@ -1136,9 +2047,8 @@ class FilesExt(files.FilesAPI):
                     # a 503 or 500 response, then you need to resume the interrupted upload from where it left off.
                     # Let's follow that for all potentially retryable status codes.
-                    # Together with the catch block below we replicate the logic in _retry_databricks_idempotent_operation().
                     if upload_response.status_code in self._RETRYABLE_STATUS_CODES:
-                        if retry_count < self._config.multipart_upload_max_retries:
+                        if retry_count < self._config.files_ext_multipart_upload_max_retries:
                             retry_count += 1
                             # let original upload_response be handled as an error
                             upload_response = retrieve_upload_status() or upload_response
@@ -1148,7 +2058,10 @@ class FilesExt(files.FilesAPI):
                 except RequestException as e:
                     # Let's do the same for retryable network errors.
-                    if _BaseClient._is_retryable(e) and retry_count < self._config.multipart_upload_max_retries:
+                    if (
+                        _BaseClient._is_retryable(e)
+                        and retry_count < self._config.files_ext_multipart_upload_max_retries
+                    ):
                         retry_count += 1
                         upload_response = retrieve_upload_status()
                         if not upload_response:
@@ -1194,7 +2107,7 @@ class FilesExt(files.FilesAPI):
                     uploaded_bytes_count = next_chunk_offset - chunk_offset
                     chunk_offset = next_chunk_offset
-                elif upload_response.status_code == 412 and not overwrite:
+                elif upload_response.status_code == 412 and not ctx.overwrite:
                     # Assuming this is only possible reason
                     # Full message in this case: "At least one of the pre-conditions you specified did not hold."
                     raise AlreadyExists("The file being created already exists.")
@@ -1227,19 +2140,38 @@ class FilesExt(files.FilesAPI):
         else:
             raise ValueError(f"Cannot parse response header: Range: {range_string}")
-    def _get_url_expire_time(self):
-        """Generates expiration time and save it in the required format."""
-        current_time = datetime.datetime.now(datetime.timezone.utc)
-        expire_time = current_time + self._config.multipart_upload_url_expiration_duration
+    def _get_rfc339_timestamp_with_future_offset(self, base_time: datetime.datetime, offset: timedelta) -> str:
+        """Generates an offset timestamp in an RFC3339 format suitable for URL generation"""
+        offset_timestamp = base_time + offset
         # From Google Protobuf doc:
         # In JSON format, the Timestamp type is encoded as a string in the
         #   * [RFC 3339](https://www.ietf.org/rfc/rfc3339.txt) format. That is, the
         #   * format is "{year}-{month}-{day}T{hour}:{min}:{sec}[.{frac_sec}]Z"
-        return expire_time.strftime("%Y-%m-%dT%H:%M:%SZ")
+        return offset_timestamp.strftime("%Y-%m-%dT%H:%M:%SZ")
+    def _get_upload_url_expire_time(self) -> str:
+        """Generates expiration time in the required format."""
+        current_time = datetime.datetime.now(datetime.timezone.utc)
+        return self._get_rfc339_timestamp_with_future_offset(
+            current_time, self._config.files_ext_multipart_upload_url_expiration_duration
+        )
+    def _get_download_url_expire_time(self) -> str:
+        """Generates expiration time in the required format."""
+        current_time = datetime.datetime.now(datetime.timezone.utc)
+        return self._get_rfc339_timestamp_with_future_offset(
+            current_time, self._config.files_ext_presigned_download_url_expiration_duration
+        )
-    def _abort_multipart_upload(self, target_path: str, session_token: str, cloud_provider_session: requests.Session):
+    def _abort_multipart_upload(
+        self, ctx: _UploadContext, session_token: str, cloud_provider_session: requests.Session
+    ) -> None:
         """Aborts ongoing multipart upload session to clean up incomplete file."""
-        body: dict = {"path": target_path, "session_token": session_token, "expire_time": self._get_url_expire_time()}
+        body: dict = {
+            "path": ctx.target_path,
+            "session_token": session_token,
+            "expire_time": self._get_upload_url_expire_time(),
+        }
         headers = {"Content-Type": "application/json"}
@@ -1254,13 +2186,13 @@ class FilesExt(files.FilesAPI):
         for h in required_headers:
             headers[h["name"]] = h["value"]
-        def perform():
+        def perform() -> requests.Response:
             return cloud_provider_session.request(
                 "DELETE",
                 abort_url,
                 headers=headers,
                 data=b"",
-                timeout=self._config.multipart_upload_single_chunk_upload_timeout_seconds,
+                timeout=self._config.files_ext_network_transfer_inactivity_timeout_seconds,
             )
         abort_response = self._retry_cloud_idempotent_operation(perform)
@@ -1270,19 +2202,19 @@ class FilesExt(files.FilesAPI):
     def _abort_resumable_upload(
         self, resumable_upload_url: str, required_headers: list, cloud_provider_session: requests.Session
-    ):
+    ) -> None:
         """Aborts ongoing resumable upload session to clean up incomplete file."""
         headers: dict = {}
         for h in required_headers:
             headers[h["name"]] = h["value"]
-        def perform():
+        def perform() -> requests.Response:
             return cloud_provider_session.request(
                 "DELETE",
                 resumable_upload_url,
                 headers=headers,
                 data=b"",
-                timeout=self._config.multipart_upload_single_chunk_upload_timeout_seconds,
+                timeout=self._config.files_ext_network_transfer_inactivity_timeout_seconds,
             )
         abort_response = self._retry_cloud_idempotent_operation(perform)
@@ -1290,7 +2222,7 @@ class FilesExt(files.FilesAPI):
         if abort_response.status_code not in (200, 201):
             raise ValueError(abort_response)
-    def _create_cloud_provider_session(self):
+    def _create_cloud_provider_session(self) -> requests.Session:
         """Creates a separate session which does not inherit auth headers from BaseClient session."""
         session = requests.Session()
@@ -1304,7 +2236,7 @@ class FilesExt(files.FilesAPI):
         return session
     def _retry_cloud_idempotent_operation(
-        self, operation: Callable[[], requests.Response], before_retry: Callable = None
+        self, operation: Callable[[], requests.Response], before_retry: Optional[Callable] = None
     ) -> requests.Response:
         """Perform given idempotent operation with necessary retries for requests to non Databricks APIs.
         For cloud APIs, we will retry on network errors and on server response codes.
@@ -1337,10 +2269,14 @@ class FilesExt(files.FilesAPI):
             # where we believe request didn't reach the server
             is_retryable=extended_is_retryable,
             before_retry=before_retry,
+            clock=self._config.clock,
         )(delegate)()
     def _open_download_stream(
-        self, file_path: str, start_byte_offset: int, if_unmodified_since_timestamp: Optional[str] = None
+        self,
+        file_path: str,
+        start_byte_offset: int,
+        if_unmodified_since_timestamp: Optional[str] = None,
     ) -> DownloadResponse:
         """Opens a download stream from given offset, performing necessary retries."""
         headers = {
@@ -1350,7 +2286,7 @@ class FilesExt(files.FilesAPI):
         if start_byte_offset and not if_unmodified_since_timestamp:
             raise Exception("if_unmodified_since_timestamp is required if start_byte_offset is specified")
-        if start_byte_offset:
+        if start_byte_offset > 0:
             headers["Range"] = f"bytes={start_byte_offset}-"
         if if_unmodified_since_timestamp:
@@ -1361,6 +2297,23 @@ class FilesExt(files.FilesAPI):
             "content-type",
             "last-modified",
         ]
+        result = self._init_download_response_mode_csp_with_fallback(file_path, headers, response_headers)
+        if not isinstance(result.contents, _StreamingResponse):
+            raise Exception(
+                "Internal error: response contents is of unexpected type: " + type(result.contents).__name__
+            )
+        return result
+    def _init_download_response_files_api(
+        self, file_path: str, headers: dict[str, str], response_headers: list[str]
+    ) -> DownloadResponse:
+        """
+        Initiates a download response using the Files API.
+        """
         # Method _api.do() takes care of retrying and will raise an exception in case of failure.
         res = self._api.do(
             "GET",
@@ -1369,22 +2322,119 @@ class FilesExt(files.FilesAPI):
             response_headers=response_headers,
             raw=True,
         )
+        return DownloadResponse.from_dict(res)
-        result = DownloadResponse.from_dict(res)
-        if not isinstance(result.contents, _StreamingResponse):
-            raise Exception(
-                "Internal error: response contents is of unexpected type: " + type(result.contents).__name__
+    def _create_download_url(self, file_path: str) -> CreateDownloadUrlResponse:
+        """
+        Creates a presigned download URL using the CSP presigned URL API.
+        Wrapped in similar retry logic to the internal API.do call:
+        1. Call _.api.do to obtain the presigned URL
+        2. Return the presigned URL
+        """
+        # Method _api.do() takes care of retrying and will raise an exception in case of failure.
+        try:
+            raw_response = self._api.do(
+                "POST",
+                f"/api/2.0/fs/create-download-url",
+                query={
+                    "path": file_path,
+                    "expire_time": self._get_download_url_expire_time(),
+                },
             )
-        return result
+            return CreateDownloadUrlResponse.from_dict(raw_response)
+        except PermissionDenied as e:
+            if self._is_presigned_urls_disabled_error(e):
+                raise FallbackToDownloadUsingFilesApi(f"Presigned URLs are disabled")
+            else:
+                raise e from None
+    def _init_download_response_presigned_api(self, file_path: str, added_headers: dict[str, str]) -> DownloadResponse:
+        """
+        Initiates a download response using the CSP presigned URL API.
+        Wrapped in similar retry logic to the internal API.do call:
+        1. Call _.api.do to obtain the presigned URL
+        2. Attempt to establish a streaming connection via the presigned URL
+        3. Construct a StreamingResponse from the presigned URL
+        """
+        url_and_headers = self._create_download_url(file_path)
+        cloud_provider_session = self._create_cloud_provider_session()
+        header_overlap = added_headers.keys() & url_and_headers.headers.keys()
+        if header_overlap:
+            raise ValueError(
+                f"Provided headers overlap with required headers from the CSP API bundle: {header_overlap}"
+            )
+        merged_headers = {**added_headers, **url_and_headers.headers}
+        def perform() -> requests.Response:
+            return cloud_provider_session.request(
+                "GET",
+                url_and_headers.url,
+                headers=merged_headers,
+                timeout=self._config.files_ext_network_transfer_inactivity_timeout_seconds,
+                stream=True,
+            )
+        csp_response: _RawResponse = self._retry_cloud_idempotent_operation(perform)
-    def _wrap_stream(self, file_path: str, download_response: DownloadResponse):
+        # Mapping the error if the response is not successful.
+        if csp_response.status_code in (200, 201, 206):
+            resp = DownloadResponse(
+                content_length=int(csp_response.headers.get("content-length")),
+                content_type=csp_response.headers.get("content-type"),
+                last_modified=csp_response.headers.get("last-modified"),
+                contents=_StreamingResponse(csp_response, self._config.files_ext_client_download_streaming_chunk_size),
+            )
+            return resp
+        elif csp_response.status_code == 403:
+            # We got 403 failure when downloading the file. This might happen due to Azure firewall enabled for the customer bucket.
+            # Let's fallback to using Files API which might be allowlisted to download.
+            raise FallbackToDownloadUsingFilesApi(f"Direct download forbidden: {csp_response.content}")
+        else:
+            message = (
+                f"Unsuccessful download. Response status: {csp_response.status_code}, body: {csp_response.content}"
+            )
+            _LOG.warning(message)
+            mapped_error = _error_mapper(csp_response, {})
+            raise mapped_error or ValueError(message)
+    def _init_download_response_mode_csp_with_fallback(
+        self, file_path: str, headers: dict[str, str], response_headers: list[str]
+    ) -> DownloadResponse:
+        """
+        Initiates a download response using the CSP presigned URL API or the Files API, depending on the configuration.
+        If the CSP presigned download API is enabled, it will attempt to use that first.
+        If the CSP API call fails, it will fall back to the Files API.
+        If the CSP presigned download API is disabled, it will use the Files API directly.
+        """
+        try:
+            _LOG.debug(f"Attempting download of {file_path} via CSP APIs")
+            return self._init_download_response_presigned_api(file_path, headers)
+        except FallbackToDownloadUsingFilesApi as e:
+            _LOG.info(f"Falling back to download via Files API: {e}")
+            _LOG.debug(f"Attempt via CSP APIs for {file_path} failed. Falling back to download via Files API")
+            ret = self._init_download_response_files_api(file_path, headers, response_headers)
+            return ret
+    def _wrap_stream(
+        self,
+        file_path: str,
+        download_response: DownloadResponse,
+        start_byte_offset: int = 0,
+    ) -> "_ResilientResponse":
         underlying_response = _ResilientIterator._extract_raw_response(download_response)
         return _ResilientResponse(
             self,
             file_path,
             download_response.last_modified,
-            offset=0,
+            offset=start_byte_offset,
             underlying_response=underlying_response,
         )
@@ -1398,29 +2448,24 @@ class _ResilientResponse(_RawResponse):
         file_last_modified: str,
         offset: int,
         underlying_response: _RawResponse,
-    ):
+    ) -> None:
         self.api = api
         self.file_path = file_path
         self.underlying_response = underlying_response
         self.offset = offset
         self.file_last_modified = file_last_modified
-    def iter_content(self, chunk_size=1, decode_unicode=False):
+    def iter_content(self, chunk_size: int = 1, decode_unicode: bool = False) -> Iterator[bytes]:
         if decode_unicode:
             raise ValueError("Decode unicode is not supported")
         iterator = self.underlying_response.iter_content(chunk_size=chunk_size, decode_unicode=False)
         self.iterator = _ResilientIterator(
-            iterator,
-            self.file_path,
-            self.file_last_modified,
-            self.offset,
-            self.api,
-            chunk_size,
+            iterator, self.file_path, self.file_last_modified, self.offset, self.api, chunk_size
         )
         return self.iterator
-    def close(self):
+    def close(self) -> None:
         self.iterator.close()
@@ -1432,18 +2477,18 @@ class _ResilientIterator(Iterator):
     def _extract_raw_response(
         download_response: DownloadResponse,
     ) -> _RawResponse:
-        streaming_response: _StreamingResponse = download_response.contents  # this is an instance of _StreamingResponse
+        streaming_response: _StreamingResponse = download_response.contents
         return streaming_response._response
     def __init__(
         self,
-        underlying_iterator,
+        underlying_iterator: Iterator[bytes],
         file_path: str,
         file_last_modified: str,
         offset: int,
         api: FilesExt,
         chunk_size: int,
-    ):
+    ) -> None:
         self._underlying_iterator = underlying_iterator
         self._api = api
         self._file_path = file_path
@@ -1459,13 +2504,13 @@ class _ResilientIterator(Iterator):
         self._closed: bool = False
     def _should_recover(self) -> bool:
-        if self._total_recovers_count == self._api._config.files_api_client_download_max_total_recovers:
+        if self._total_recovers_count == self._api._config.files_ext_client_download_max_total_recovers:
             _LOG.debug("Total recovers limit exceeded")
             return False
         if (
-            self._api._config.files_api_client_download_max_total_recovers_without_progressing is not None
+            self._api._config.files_ext_client_download_max_total_recovers_without_progressing is not None
             and self._recovers_without_progressing_count
-            >= self._api._config.files_api_client_download_max_total_recovers_without_progressing
+            >= self._api._config.files_ext_client_download_max_total_recovers_without_progressing
         ):
             _LOG.debug("No progression recovers limit exceeded")
             return False
@@ -1481,7 +2526,7 @@ class _ResilientIterator(Iterator):
         try:
             self._underlying_iterator.close()
-            _LOG.debug("Trying to recover from offset " + str(self._offset))
+            _LOG.debug(f"Trying to recover from offset {self._offset}")
             # following call includes all the required network retries
             downloadResponse = self._api._open_download_stream(self._file_path, self._offset, self._file_last_modified)
@@ -1494,7 +2539,7 @@ class _ResilientIterator(Iterator):
         except:
             return False  # recover failed, rethrow original exception
-    def __next__(self):
+    def __next__(self) -> bytes:
         if self._closed:
             # following _BaseClient
             raise ValueError("I/O operation on closed file")
@@ -1514,6 +2559,6 @@ class _ResilientIterator(Iterator):
                 if not self._recover():
                     raise
-    def close(self):
+    def close(self) -> None:
         self._underlying_iterator.close()
         self._closed = True

databricks-sdk 0.67.0__py3-none-any.whl → 0.69.0__py3-none-any.whl

Potentially problematic release.

databricks-sdk 0.67.0py3-none-any.whl → 0.69.0py3-none-any.whl