data-sourcerer 0.7.0__tar.gz → 0.7.2__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (136) hide show
  1. {data_sourcerer-0.7.0 → data_sourcerer-0.7.2}/PKG-INFO +1 -1
  2. {data_sourcerer-0.7.0 → data_sourcerer-0.7.2}/pyproject.toml +0 -3
  3. {data_sourcerer-0.7.0 → data_sourcerer-0.7.2}/sourcerer/__init__.py +1 -1
  4. {data_sourcerer-0.7.0 → data_sourcerer-0.7.2}/sourcerer/domain/storage_provider/services.py +12 -4
  5. {data_sourcerer-0.7.0 → data_sourcerer-0.7.2}/sourcerer/infrastructure/storage_provider/services/azure.py +135 -18
  6. {data_sourcerer-0.7.0 → data_sourcerer-0.7.2}/sourcerer/infrastructure/storage_provider/services/gcp.py +133 -6
  7. {data_sourcerer-0.7.0 → data_sourcerer-0.7.2}/sourcerer/infrastructure/storage_provider/services/s3.py +104 -5
  8. {data_sourcerer-0.7.0 → data_sourcerer-0.7.2}/sourcerer/presentation/screens/main/widgets/storage_list_sidebar.py +67 -23
  9. {data_sourcerer-0.7.0 → data_sourcerer-0.7.2}/sourcerer/presentation/screens/storage_action_progress/main.py +36 -16
  10. {data_sourcerer-0.7.0 → data_sourcerer-0.7.2}/sourcerer/settings.py +3 -0
  11. {data_sourcerer-0.7.0 → data_sourcerer-0.7.2}/.gitignore +0 -0
  12. {data_sourcerer-0.7.0 → data_sourcerer-0.7.2}/LICENSE +0 -0
  13. {data_sourcerer-0.7.0 → data_sourcerer-0.7.2}/README.md +0 -0
  14. {data_sourcerer-0.7.0 → data_sourcerer-0.7.2}/sourcerer/domain/__init__.py +0 -0
  15. {data_sourcerer-0.7.0 → data_sourcerer-0.7.2}/sourcerer/domain/access_credentials/__init__.py +0 -0
  16. {data_sourcerer-0.7.0 → data_sourcerer-0.7.2}/sourcerer/domain/access_credentials/entities.py +0 -0
  17. {data_sourcerer-0.7.0 → data_sourcerer-0.7.2}/sourcerer/domain/access_credentials/exceptions.py +0 -0
  18. {data_sourcerer-0.7.0 → data_sourcerer-0.7.2}/sourcerer/domain/access_credentials/repositories.py +0 -0
  19. {data_sourcerer-0.7.0 → data_sourcerer-0.7.2}/sourcerer/domain/access_credentials/services.py +0 -0
  20. {data_sourcerer-0.7.0 → data_sourcerer-0.7.2}/sourcerer/domain/file_system/__init__.py +0 -0
  21. {data_sourcerer-0.7.0 → data_sourcerer-0.7.2}/sourcerer/domain/file_system/entities.py +0 -0
  22. {data_sourcerer-0.7.0 → data_sourcerer-0.7.2}/sourcerer/domain/file_system/exceptions.py +0 -0
  23. {data_sourcerer-0.7.0 → data_sourcerer-0.7.2}/sourcerer/domain/file_system/services.py +0 -0
  24. {data_sourcerer-0.7.0 → data_sourcerer-0.7.2}/sourcerer/domain/package_meta/__init__.py +0 -0
  25. {data_sourcerer-0.7.0 → data_sourcerer-0.7.2}/sourcerer/domain/package_meta/entities.py +0 -0
  26. {data_sourcerer-0.7.0 → data_sourcerer-0.7.2}/sourcerer/domain/package_meta/services.py +0 -0
  27. {data_sourcerer-0.7.0 → data_sourcerer-0.7.2}/sourcerer/domain/settings/__init__.py +0 -0
  28. {data_sourcerer-0.7.0 → data_sourcerer-0.7.2}/sourcerer/domain/settings/entities.py +0 -0
  29. {data_sourcerer-0.7.0 → data_sourcerer-0.7.2}/sourcerer/domain/settings/repositories.py +0 -0
  30. {data_sourcerer-0.7.0 → data_sourcerer-0.7.2}/sourcerer/domain/settings/services.py +0 -0
  31. {data_sourcerer-0.7.0 → data_sourcerer-0.7.2}/sourcerer/domain/shared/__init__.py +0 -0
  32. {data_sourcerer-0.7.0 → data_sourcerer-0.7.2}/sourcerer/domain/shared/entities.py +0 -0
  33. {data_sourcerer-0.7.0 → data_sourcerer-0.7.2}/sourcerer/domain/storage/__init__.py +0 -0
  34. {data_sourcerer-0.7.0 → data_sourcerer-0.7.2}/sourcerer/domain/storage/entities.py +0 -0
  35. {data_sourcerer-0.7.0 → data_sourcerer-0.7.2}/sourcerer/domain/storage/repositories.py +0 -0
  36. {data_sourcerer-0.7.0 → data_sourcerer-0.7.2}/sourcerer/domain/storage_provider/__init__.py +0 -0
  37. {data_sourcerer-0.7.0 → data_sourcerer-0.7.2}/sourcerer/domain/storage_provider/entities.py +0 -0
  38. {data_sourcerer-0.7.0 → data_sourcerer-0.7.2}/sourcerer/domain/storage_provider/exceptions.py +0 -0
  39. {data_sourcerer-0.7.0 → data_sourcerer-0.7.2}/sourcerer/infrastructure/__init__.py +0 -0
  40. {data_sourcerer-0.7.0 → data_sourcerer-0.7.2}/sourcerer/infrastructure/access_credentials/__init__.py +0 -0
  41. {data_sourcerer-0.7.0 → data_sourcerer-0.7.2}/sourcerer/infrastructure/access_credentials/exceptions.py +0 -0
  42. {data_sourcerer-0.7.0 → data_sourcerer-0.7.2}/sourcerer/infrastructure/access_credentials/registry.py +0 -0
  43. {data_sourcerer-0.7.0 → data_sourcerer-0.7.2}/sourcerer/infrastructure/access_credentials/repositories.py +0 -0
  44. {data_sourcerer-0.7.0 → data_sourcerer-0.7.2}/sourcerer/infrastructure/access_credentials/services.py +0 -0
  45. {data_sourcerer-0.7.0 → data_sourcerer-0.7.2}/sourcerer/infrastructure/db/__init__.py +0 -0
  46. {data_sourcerer-0.7.0 → data_sourcerer-0.7.2}/sourcerer/infrastructure/db/config.py +0 -0
  47. {data_sourcerer-0.7.0 → data_sourcerer-0.7.2}/sourcerer/infrastructure/db/models.py +0 -0
  48. {data_sourcerer-0.7.0 → data_sourcerer-0.7.2}/sourcerer/infrastructure/file_system/__init__.py +0 -0
  49. {data_sourcerer-0.7.0 → data_sourcerer-0.7.2}/sourcerer/infrastructure/file_system/exceptions.py +0 -0
  50. {data_sourcerer-0.7.0 → data_sourcerer-0.7.2}/sourcerer/infrastructure/file_system/services.py +0 -0
  51. {data_sourcerer-0.7.0 → data_sourcerer-0.7.2}/sourcerer/infrastructure/package_meta/__init__.py +0 -0
  52. {data_sourcerer-0.7.0 → data_sourcerer-0.7.2}/sourcerer/infrastructure/package_meta/services.py +0 -0
  53. {data_sourcerer-0.7.0 → data_sourcerer-0.7.2}/sourcerer/infrastructure/settings/__init__.py +0 -0
  54. {data_sourcerer-0.7.0 → data_sourcerer-0.7.2}/sourcerer/infrastructure/settings/repositories.py +0 -0
  55. {data_sourcerer-0.7.0 → data_sourcerer-0.7.2}/sourcerer/infrastructure/settings/services.py +0 -0
  56. {data_sourcerer-0.7.0 → data_sourcerer-0.7.2}/sourcerer/infrastructure/storage/__init__.py +0 -0
  57. {data_sourcerer-0.7.0 → data_sourcerer-0.7.2}/sourcerer/infrastructure/storage/repositories.py +0 -0
  58. {data_sourcerer-0.7.0 → data_sourcerer-0.7.2}/sourcerer/infrastructure/storage/services.py +0 -0
  59. {data_sourcerer-0.7.0 → data_sourcerer-0.7.2}/sourcerer/infrastructure/storage_provider/__init__.py +0 -0
  60. {data_sourcerer-0.7.0 → data_sourcerer-0.7.2}/sourcerer/infrastructure/storage_provider/exceptions.py +0 -0
  61. {data_sourcerer-0.7.0 → data_sourcerer-0.7.2}/sourcerer/infrastructure/storage_provider/registry.py +0 -0
  62. {data_sourcerer-0.7.0 → data_sourcerer-0.7.2}/sourcerer/infrastructure/storage_provider/services/__init__.py +0 -0
  63. {data_sourcerer-0.7.0 → data_sourcerer-0.7.2}/sourcerer/infrastructure/utils.py +0 -0
  64. {data_sourcerer-0.7.0 → data_sourcerer-0.7.2}/sourcerer/presentation/__init__.py +0 -0
  65. {data_sourcerer-0.7.0 → data_sourcerer-0.7.2}/sourcerer/presentation/app.py +0 -0
  66. {data_sourcerer-0.7.0 → data_sourcerer-0.7.2}/sourcerer/presentation/di_container.py +0 -0
  67. {data_sourcerer-0.7.0 → data_sourcerer-0.7.2}/sourcerer/presentation/screens/__init__.py +0 -0
  68. {data_sourcerer-0.7.0 → data_sourcerer-0.7.2}/sourcerer/presentation/screens/about/__init__.py +0 -0
  69. {data_sourcerer-0.7.0 → data_sourcerer-0.7.2}/sourcerer/presentation/screens/about/main.py +0 -0
  70. {data_sourcerer-0.7.0 → data_sourcerer-0.7.2}/sourcerer/presentation/screens/about/styles.tcss +0 -0
  71. {data_sourcerer-0.7.0 → data_sourcerer-0.7.2}/sourcerer/presentation/screens/critical_error/__init__.py +0 -0
  72. {data_sourcerer-0.7.0 → data_sourcerer-0.7.2}/sourcerer/presentation/screens/critical_error/main.py +0 -0
  73. {data_sourcerer-0.7.0 → data_sourcerer-0.7.2}/sourcerer/presentation/screens/critical_error/styles.tcss +0 -0
  74. {data_sourcerer-0.7.0 → data_sourcerer-0.7.2}/sourcerer/presentation/screens/file_system_finder/__init__.py +0 -0
  75. {data_sourcerer-0.7.0 → data_sourcerer-0.7.2}/sourcerer/presentation/screens/file_system_finder/main.py +0 -0
  76. {data_sourcerer-0.7.0 → data_sourcerer-0.7.2}/sourcerer/presentation/screens/file_system_finder/styles.tcss +0 -0
  77. {data_sourcerer-0.7.0 → data_sourcerer-0.7.2}/sourcerer/presentation/screens/file_system_finder/widgets/__init__.py +0 -0
  78. {data_sourcerer-0.7.0 → data_sourcerer-0.7.2}/sourcerer/presentation/screens/file_system_finder/widgets/file_system_navigator.py +0 -0
  79. {data_sourcerer-0.7.0 → data_sourcerer-0.7.2}/sourcerer/presentation/screens/main/__init__.py +0 -0
  80. {data_sourcerer-0.7.0 → data_sourcerer-0.7.2}/sourcerer/presentation/screens/main/main.py +0 -0
  81. {data_sourcerer-0.7.0 → data_sourcerer-0.7.2}/sourcerer/presentation/screens/main/messages/__init__.py +0 -0
  82. {data_sourcerer-0.7.0 → data_sourcerer-0.7.2}/sourcerer/presentation/screens/main/messages/delete_request.py +0 -0
  83. {data_sourcerer-0.7.0 → data_sourcerer-0.7.2}/sourcerer/presentation/screens/main/messages/download_request.py +0 -0
  84. {data_sourcerer-0.7.0 → data_sourcerer-0.7.2}/sourcerer/presentation/screens/main/messages/preview_request.py +0 -0
  85. {data_sourcerer-0.7.0 → data_sourcerer-0.7.2}/sourcerer/presentation/screens/main/messages/refresh_storages_list_request.py +0 -0
  86. {data_sourcerer-0.7.0 → data_sourcerer-0.7.2}/sourcerer/presentation/screens/main/messages/resizing_rule.py +0 -0
  87. {data_sourcerer-0.7.0 → data_sourcerer-0.7.2}/sourcerer/presentation/screens/main/messages/select_storage_item.py +0 -0
  88. {data_sourcerer-0.7.0 → data_sourcerer-0.7.2}/sourcerer/presentation/screens/main/messages/uncheck_files_request.py +0 -0
  89. {data_sourcerer-0.7.0 → data_sourcerer-0.7.2}/sourcerer/presentation/screens/main/messages/upload_request.py +0 -0
  90. {data_sourcerer-0.7.0 → data_sourcerer-0.7.2}/sourcerer/presentation/screens/main/mixins/__init__.py +0 -0
  91. {data_sourcerer-0.7.0 → data_sourcerer-0.7.2}/sourcerer/presentation/screens/main/mixins/resize_containers_watcher_mixin.py +0 -0
  92. {data_sourcerer-0.7.0 → data_sourcerer-0.7.2}/sourcerer/presentation/screens/main/styles.tcss +0 -0
  93. {data_sourcerer-0.7.0 → data_sourcerer-0.7.2}/sourcerer/presentation/screens/main/widgets/__init__.py +0 -0
  94. {data_sourcerer-0.7.0 → data_sourcerer-0.7.2}/sourcerer/presentation/screens/main/widgets/gradient.py +0 -0
  95. {data_sourcerer-0.7.0 → data_sourcerer-0.7.2}/sourcerer/presentation/screens/main/widgets/resizing_rule.py +0 -0
  96. {data_sourcerer-0.7.0 → data_sourcerer-0.7.2}/sourcerer/presentation/screens/main/widgets/storage_content.py +0 -0
  97. {data_sourcerer-0.7.0 → data_sourcerer-0.7.2}/sourcerer/presentation/screens/preview_content/__init__.py +0 -0
  98. {data_sourcerer-0.7.0 → data_sourcerer-0.7.2}/sourcerer/presentation/screens/preview_content/main.py +0 -0
  99. {data_sourcerer-0.7.0 → data_sourcerer-0.7.2}/sourcerer/presentation/screens/preview_content/styles.tcss +0 -0
  100. {data_sourcerer-0.7.0 → data_sourcerer-0.7.2}/sourcerer/presentation/screens/preview_content/text_area_style.py +0 -0
  101. {data_sourcerer-0.7.0 → data_sourcerer-0.7.2}/sourcerer/presentation/screens/provider_creds_list/__init__.py +0 -0
  102. {data_sourcerer-0.7.0 → data_sourcerer-0.7.2}/sourcerer/presentation/screens/provider_creds_list/main.py +0 -0
  103. {data_sourcerer-0.7.0 → data_sourcerer-0.7.2}/sourcerer/presentation/screens/provider_creds_list/messages/__init__.py +0 -0
  104. {data_sourcerer-0.7.0 → data_sourcerer-0.7.2}/sourcerer/presentation/screens/provider_creds_list/messages/reload_credentials_request.py +0 -0
  105. {data_sourcerer-0.7.0 → data_sourcerer-0.7.2}/sourcerer/presentation/screens/provider_creds_list/styles.tcss +0 -0
  106. {data_sourcerer-0.7.0 → data_sourcerer-0.7.2}/sourcerer/presentation/screens/provider_creds_registration/__init__.py +0 -0
  107. {data_sourcerer-0.7.0 → data_sourcerer-0.7.2}/sourcerer/presentation/screens/provider_creds_registration/main.py +0 -0
  108. {data_sourcerer-0.7.0 → data_sourcerer-0.7.2}/sourcerer/presentation/screens/provider_creds_registration/styles.tcss +0 -0
  109. {data_sourcerer-0.7.0 → data_sourcerer-0.7.2}/sourcerer/presentation/screens/question/__init__.py +0 -0
  110. {data_sourcerer-0.7.0 → data_sourcerer-0.7.2}/sourcerer/presentation/screens/question/main.py +0 -0
  111. {data_sourcerer-0.7.0 → data_sourcerer-0.7.2}/sourcerer/presentation/screens/question/styles.tcss +0 -0
  112. {data_sourcerer-0.7.0 → data_sourcerer-0.7.2}/sourcerer/presentation/screens/settings/__init__.py +0 -0
  113. {data_sourcerer-0.7.0 → data_sourcerer-0.7.2}/sourcerer/presentation/screens/settings/main.py +0 -0
  114. {data_sourcerer-0.7.0 → data_sourcerer-0.7.2}/sourcerer/presentation/screens/settings/styles.tcss +0 -0
  115. {data_sourcerer-0.7.0 → data_sourcerer-0.7.2}/sourcerer/presentation/screens/shared/__init__.py +0 -0
  116. {data_sourcerer-0.7.0 → data_sourcerer-0.7.2}/sourcerer/presentation/screens/shared/containers.py +0 -0
  117. {data_sourcerer-0.7.0 → data_sourcerer-0.7.2}/sourcerer/presentation/screens/shared/modal_screens.py +0 -0
  118. {data_sourcerer-0.7.0 → data_sourcerer-0.7.2}/sourcerer/presentation/screens/shared/widgets/__init__.py +0 -0
  119. {data_sourcerer-0.7.0 → data_sourcerer-0.7.2}/sourcerer/presentation/screens/shared/widgets/button.py +0 -0
  120. {data_sourcerer-0.7.0 → data_sourcerer-0.7.2}/sourcerer/presentation/screens/shared/widgets/labeled_input.py +0 -0
  121. {data_sourcerer-0.7.0 → data_sourcerer-0.7.2}/sourcerer/presentation/screens/shared/widgets/spinner.py +0 -0
  122. {data_sourcerer-0.7.0 → data_sourcerer-0.7.2}/sourcerer/presentation/screens/storage_action_progress/__init__.py +0 -0
  123. {data_sourcerer-0.7.0 → data_sourcerer-0.7.2}/sourcerer/presentation/screens/storage_action_progress/styles.tcss +0 -0
  124. {data_sourcerer-0.7.0 → data_sourcerer-0.7.2}/sourcerer/presentation/screens/storages_list/__init__.py +0 -0
  125. {data_sourcerer-0.7.0 → data_sourcerer-0.7.2}/sourcerer/presentation/screens/storages_list/main.py +0 -0
  126. {data_sourcerer-0.7.0 → data_sourcerer-0.7.2}/sourcerer/presentation/screens/storages_list/messages/__init__.py +0 -0
  127. {data_sourcerer-0.7.0 → data_sourcerer-0.7.2}/sourcerer/presentation/screens/storages_list/messages/reload_storages_request.py +0 -0
  128. {data_sourcerer-0.7.0 → data_sourcerer-0.7.2}/sourcerer/presentation/screens/storages_list/styles.tcss +0 -0
  129. {data_sourcerer-0.7.0 → data_sourcerer-0.7.2}/sourcerer/presentation/screens/storages_registration/__init__.py +0 -0
  130. {data_sourcerer-0.7.0 → data_sourcerer-0.7.2}/sourcerer/presentation/screens/storages_registration/main.py +0 -0
  131. {data_sourcerer-0.7.0 → data_sourcerer-0.7.2}/sourcerer/presentation/screens/storages_registration/styles.tcss +0 -0
  132. {data_sourcerer-0.7.0 → data_sourcerer-0.7.2}/sourcerer/presentation/settings.py +0 -0
  133. {data_sourcerer-0.7.0 → data_sourcerer-0.7.2}/sourcerer/presentation/themes/__init__.py +0 -0
  134. {data_sourcerer-0.7.0 → data_sourcerer-0.7.2}/sourcerer/presentation/themes/github_dark.py +0 -0
  135. {data_sourcerer-0.7.0 → data_sourcerer-0.7.2}/sourcerer/presentation/utils.py +0 -0
  136. {data_sourcerer-0.7.0 → data_sourcerer-0.7.2}/sourcerer/utils.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: data-sourcerer
3
- Version: 0.7.0
3
+ Version: 0.7.2
4
4
  Summary: Sourcerer is a terminal cloud storage navigator.
5
5
  Author-email: Bohdana Kuzmenko <bohdana.kuzmenko.dev@gmail.com>
6
6
  License: MIT
@@ -76,9 +76,6 @@ source = [
76
76
  "src/sourcerer/infrastructure",
77
77
  ]
78
78
 
79
- [tool.coverage.report]
80
- fail_under = 75
81
-
82
79
  [tool.ruff]
83
80
  line-length = 120
84
81
  target-version = "py312"
@@ -14,4 +14,4 @@ The application is structured using a clean architecture approach with:
14
14
 
15
15
  name = "sourcerer"
16
16
  package_name = "data_sourcerer"
17
- __version__ = "0.7.0"
17
+ __version__ = "0.7.2"
@@ -4,7 +4,7 @@ Base storage provider service interface.
4
4
  This module defines the abstract base class for storage provider services,
5
5
  providing a common interface for cloud storage operations.
6
6
  """
7
-
7
+ import threading
8
8
  from abc import ABC, abstractmethod
9
9
  from collections.abc import Callable
10
10
  from pathlib import Path
@@ -93,20 +93,28 @@ class BaseStorageProviderService(ABC):
93
93
  storage_path: str,
94
94
  source_path: Path,
95
95
  dest_path: str | None = None,
96
+ cancel_event: threading.Event | None = None,
97
+ progress_callback: Callable | None = None,
96
98
  ) -> None:
97
99
  """
98
- Upload a file to the specified storage path.
100
+ upload a file to the specified storage path.
99
101
 
100
102
  Args:
101
103
  storage (str): The storage identifier
102
104
  storage_path (str): The path within the storage to upload
103
105
  source_path (Path): Local file path to upload
104
106
  dest_path (str, optional): Destination path in storage. Defaults to None.
107
+ cancel_event (threading.Event, optional): Event to signal upload cancellation. Defaults to None.
108
+ progress_callback (callable, optional): Callback function for progress updates. Defaults to None.
105
109
  """
106
110
 
107
111
  @abstractmethod
108
112
  def download_storage_item(
109
- self, storage: str, key: str, progress_callback: Callable | None = None
113
+ self,
114
+ storage: str,
115
+ key: str,
116
+ progress_callback: Callable | None = None,
117
+ cancel_event: threading.Event | None = None,
110
118
  ) -> str:
111
119
  """
112
120
  Download a file from storage to local filesystem.
@@ -115,7 +123,7 @@ class BaseStorageProviderService(ABC):
115
123
  storage (str): The storage identifier
116
124
  key (str): The key/path of the item to download
117
125
  progress_callback (callable, optional): Callback function for progress updates. Defaults to None.
118
-
126
+ cancel_event (threading.Event, optional): Event to signal download cancellation. Defaults to None.
119
127
  Returns:
120
128
  str: Path to the downloaded file
121
129
  """
@@ -4,15 +4,19 @@ Implementation of Azure storage provider services.
4
4
  This module provides concrete implementations of the BaseStorageProviderService
5
5
  interface for various cloud storage providers.
6
6
  """
7
-
7
+ import asyncio
8
+ import base64
8
9
  import os.path
10
+ import shutil
11
+ import tempfile
9
12
  import threading
10
13
  from collections.abc import Callable
14
+ from concurrent.futures import ThreadPoolExecutor
11
15
  from pathlib import Path
12
16
  from typing import Any
13
17
 
14
18
  from azure.mgmt.storage import StorageManagementClient
15
- from azure.storage.blob import BlobServiceClient
19
+ from azure.storage.blob import BlobBlock, BlobServiceClient
16
20
  from cachetools import LRUCache
17
21
  from platformdirs import user_downloads_dir
18
22
 
@@ -35,6 +39,7 @@ from sourcerer.infrastructure.storage_provider.exceptions import (
35
39
  )
36
40
  from sourcerer.infrastructure.storage_provider.registry import storage_provider
37
41
  from sourcerer.infrastructure.utils import generate_uuid, is_text_file
42
+ from sourcerer.settings import DOWNLOAD_BLOCK_SIZE, MULTIPART_UPLOAD_BLOCK_SIZE
38
43
 
39
44
 
40
45
  @storage_provider(StorageProvider.AzureStorage)
@@ -141,13 +146,16 @@ class AzureStorageProviderService(BaseStorageProviderService):
141
146
  folders.update([i.name for i in containers_client.list_containers()])
142
147
  else:
143
148
  path_parts = path.split("/", 1)
144
- if len(path_parts) > 1:
145
- path, prefix = path_parts[0], path_parts[1] + "/" + prefix
146
- blobs_client = containers_client.get_container_client(path)
149
+
150
+ container = path_parts[0]
151
+ base_path = "" if len(path_parts) == 1 else path_parts[1] + "/"
152
+
153
+ blobs_client = containers_client.get_container_client(container)
154
+
147
155
  for blob in blobs_client.walk_blobs(
148
- name_starts_with=prefix, delimiter="/"
156
+ name_starts_with=base_path + prefix, delimiter="/"
149
157
  ):
150
- remaining_path = blob.name[len(prefix) :]
158
+ remaining_path = blob.name[len(base_path) :]
151
159
  if "/" in remaining_path:
152
160
  folder_name = remaining_path.split("/")[0]
153
161
  if folder_name not in folders:
@@ -158,8 +166,8 @@ class AzureStorageProviderService(BaseStorageProviderService):
158
166
  File(
159
167
  generate_uuid(),
160
168
  remaining_path,
161
- size=blob.size,
162
- date_modified=blob.last_modified,
169
+ size=blob.size, # type: ignore
170
+ date_modified=blob.last_modified, # type: ignore
163
171
  is_text=is_text_file(blob.name),
164
172
  )
165
173
  )
@@ -208,6 +216,8 @@ class AzureStorageProviderService(BaseStorageProviderService):
208
216
  storage_path: str,
209
217
  source_path: Path,
210
218
  dest_path: str | None = None,
219
+ cancel_event: threading.Event | None = None,
220
+ progress_callback: Callable | None = None,
211
221
  ) -> None:
212
222
  """
213
223
  Upload a file to the specified Azure container path.
@@ -216,6 +226,8 @@ class AzureStorageProviderService(BaseStorageProviderService):
216
226
  storage_path (str): The path within the container to upload
217
227
  source_path (Path): Local file path to upload
218
228
  dest_path (str, optional): Destination path in storage. Defaults to None.
229
+ cancel_event (threading.Event, optional): Event to signal upload cancellation. Defaults to None.
230
+ progress_callback (Callable, optional): Callback function for progress updates. Defaults to None.
219
231
  """
220
232
  try:
221
233
  if not storage_path:
@@ -232,16 +244,38 @@ class AzureStorageProviderService(BaseStorageProviderService):
232
244
  storage_path = storage_path_parts[1] if len(storage_path_parts) > 1 else ""
233
245
  blob_name = os.path.join(storage_path, dest_path or source_path.name)
234
246
 
235
- blob_client = containers_client.get_container_client(container)
236
- with open(source_path, "rb") as file_handle:
237
- blob_client.upload_blob(
238
- blob_name or source_path.name, file_handle, overwrite=True
239
- )
247
+ if source_path.stat().st_size <= MULTIPART_UPLOAD_BLOCK_SIZE:
248
+ blob_client = containers_client.get_container_client(container)
249
+ with open(source_path, "rb") as file_handle:
250
+ blob_client.upload_blob(
251
+ blob_name or source_path.name, file_handle, overwrite=True
252
+ )
253
+ if progress_callback:
254
+ progress_callback(source_path.stat().st_size)
255
+ else:
256
+ try:
257
+ run_async_sync_safe(
258
+ self.upload_multipart(
259
+ containers_client,
260
+ container,
261
+ source_path,
262
+ blob_name,
263
+ MULTIPART_UPLOAD_BLOCK_SIZE,
264
+ cancel_event,
265
+ progress_callback,
266
+ )
267
+ )
268
+ except Exception:
269
+ raise
240
270
  except Exception as ex:
241
271
  raise UploadStorageItemsError(str(ex)) from ex
242
272
 
243
273
  def download_storage_item(
244
- self, storage: str, key: str, progress_callback: Callable | None = None
274
+ self,
275
+ storage: str,
276
+ key: str,
277
+ progress_callback: Callable | None = None,
278
+ cancel_event: threading.Event | None = None,
245
279
  ) -> str:
246
280
  """
247
281
  Download a file from Azure to the local filesystem.
@@ -250,19 +284,52 @@ class AzureStorageProviderService(BaseStorageProviderService):
250
284
  storage (str): The container name
251
285
  key (str): The key/path of the item to download
252
286
  progress_callback (Callable, optional): Callback function for progress updates. Defaults to None.
287
+ cancel_event (threading.Event, optional): Event to signal download cancellation. Defaults to None.
253
288
  """
289
+ download_path = None
290
+ download_tmp_path = None
254
291
  try:
255
292
  download_path = Path(user_downloads_dir()) / Path(key).name
293
+ suffix = Path(key).suffix
294
+ download_tmp_path = (
295
+ Path(user_downloads_dir())
296
+ / f"{next(tempfile._get_candidate_names())}{suffix}" # type: ignore
297
+ )
256
298
 
257
299
  containers_client = self.get_containers_client(storage)
258
300
  path_parts = key.split("/", 1)
259
301
  container, blob_name = path_parts
260
302
  blob_client = containers_client.get_container_client(container)
261
- with open(download_path, "wb") as file:
262
- download_stream = blob_client.download_blob(blob_name)
263
- file.write(download_stream.readall())
303
+ blob_stream = blob_client.download_blob(blob_name)
304
+ total_bytes = blob_stream.properties.size
305
+
306
+ with open(download_tmp_path, "wb") as file:
307
+ if total_bytes <= DOWNLOAD_BLOCK_SIZE:
308
+ file.write(blob_stream.readall())
309
+ else:
310
+ downloaded = 0
311
+ while downloaded < total_bytes:
312
+ if cancel_event and cancel_event.is_set():
313
+ raise Exception("Download cancelled")
314
+
315
+ chunk = blob_stream.read(DOWNLOAD_BLOCK_SIZE)
316
+ if not chunk:
317
+ break
318
+
319
+ file.write(chunk)
320
+
321
+ chunk_size = len(chunk)
322
+ downloaded += chunk_size
323
+
324
+ if progress_callback:
325
+ progress_callback(chunk_size)
326
+ shutil.move(download_tmp_path, download_path)
264
327
  return str(download_path)
265
328
  except Exception as ex:
329
+ if download_path and download_path.exists():
330
+ download_path.unlink()
331
+ if download_tmp_path and download_tmp_path.exists():
332
+ download_tmp_path.unlink()
266
333
  raise ReadStorageItemsError(str(ex)) from ex
267
334
 
268
335
  def get_file_size(self, storage: str, key: str) -> int:
@@ -282,3 +349,53 @@ class AzureStorageProviderService(BaseStorageProviderService):
282
349
  return props.size
283
350
  except Exception as ex:
284
351
  raise ReadStorageItemsError(str(ex)) from ex
352
+
353
+ async def upload_multipart(
354
+ self,
355
+ client,
356
+ container: str,
357
+ source_path: Path,
358
+ blob_name: str,
359
+ block_size: int,
360
+ cancel_event=None,
361
+ progress_callback=None,
362
+ ):
363
+ max_workers = 8
364
+
365
+ blob_client = client.get_blob_client(container, blob_name)
366
+ semaphore = asyncio.Semaphore(max_workers)
367
+
368
+ async def upload_block(offset, data):
369
+ async with semaphore:
370
+ block_id = f"{offset:08d}"
371
+ encoded_block_id = base64.b64encode(block_id.encode()).decode()
372
+ blob_client.stage_block(block_id=encoded_block_id, data=data)
373
+ if progress_callback:
374
+ progress_callback(len(data))
375
+ if cancel_event and cancel_event.is_set():
376
+ raise Exception("Upload cancelled")
377
+ return BlobBlock(block_id=encoded_block_id)
378
+
379
+ async def read_and_upload():
380
+ tasks = []
381
+ with open(source_path, "rb") as f:
382
+ offset = 0
383
+ while chunk := f.read(block_size):
384
+ tasks.append(upload_block(offset, chunk))
385
+ offset += len(chunk)
386
+ if cancel_event and cancel_event.is_set():
387
+ raise Exception("Upload cancelled")
388
+ return await asyncio.gather(*tasks)
389
+
390
+ block_ids = await read_and_upload()
391
+ blob_client.commit_block_list(block_ids)
392
+
393
+
394
+ # Todo: tmp solution, we need to move to async
395
+ def run_async_sync_safe(coro):
396
+ def runner():
397
+ return asyncio.run(coro)
398
+
399
+ with ThreadPoolExecutor(1) as executor:
400
+ future = executor.submit(runner)
401
+ return future.result()
@@ -4,7 +4,9 @@ Implementation of GCP storage provider services.
4
4
  This module provides concrete implementations of the BaseStorageProviderService
5
5
  interface for various cloud storage providers.
6
6
  """
7
-
7
+ import shutil
8
+ import tempfile
9
+ import threading
8
10
  from collections.abc import Callable
9
11
  from pathlib import Path
10
12
  from typing import Any
@@ -31,7 +33,12 @@ from sourcerer.infrastructure.storage_provider.exceptions import (
31
33
  )
32
34
  from sourcerer.infrastructure.storage_provider.registry import storage_provider
33
35
  from sourcerer.infrastructure.utils import generate_uuid, is_text_file
34
- from sourcerer.settings import PAGE_SIZE, PATH_DELIMITER
36
+ from sourcerer.settings import (
37
+ DOWNLOAD_BLOCK_SIZE,
38
+ MULTIPART_UPLOAD_BLOCK_SIZE,
39
+ PAGE_SIZE,
40
+ PATH_DELIMITER,
41
+ )
35
42
 
36
43
 
37
44
  @storage_provider(StorageProvider.GoogleCloudStorage)
@@ -201,6 +208,8 @@ class GCPStorageProviderService(BaseStorageProviderService):
201
208
  storage_path: str,
202
209
  source_path: Path,
203
210
  dest_path: str | None = None,
211
+ cancel_event: threading.Event | None = None,
212
+ progress_callback: Callable | None = None,
204
213
  ) -> None:
205
214
  """
206
215
  Upload a file to the specified GCP bucket path.
@@ -210,6 +219,8 @@ class GCPStorageProviderService(BaseStorageProviderService):
210
219
  storage_path (str): The path within the bucket
211
220
  source_path (Path): Local file path to upload
212
221
  dest_path (str, optional): Destination path in GCP. Defaults to None.
222
+ cancel_event (threading.Event, optional): Event to signal upload cancellation. Defaults to None.
223
+ progress_callback (Callable, optional): Callback function for progress updates. Defaults to None.
213
224
 
214
225
  Raises:
215
226
  UploadStorageItemsError: If an error occurs while uploading the item
@@ -219,12 +230,22 @@ class GCPStorageProviderService(BaseStorageProviderService):
219
230
  storage_path = str(
220
231
  Path(storage_path or "") / (dest_path or source_path.name)
221
232
  )
222
- bucket.blob(storage_path).upload_from_filename(source_path)
233
+ blob = bucket.blob(storage_path)
234
+ if source_path.stat().st_size <= MULTIPART_UPLOAD_BLOCK_SIZE:
235
+ blob.upload_from_filename(source_path)
236
+ else:
237
+ self._upload_storage_item_multipart(
238
+ blob, source_path, cancel_event, progress_callback=progress_callback
239
+ )
223
240
  except Exception as ex:
224
241
  raise UploadStorageItemsError(str(ex)) from ex
225
242
 
226
243
  def download_storage_item(
227
- self, storage: str, key: str, progress_callback: Callable | None = None
244
+ self,
245
+ storage: str,
246
+ key: str,
247
+ progress_callback: Callable | None = None,
248
+ cancel_event: threading.Event | None = None,
228
249
  ) -> str:
229
250
  """
230
251
  Download a file from GCP to the local filesystem.
@@ -233,22 +254,53 @@ class GCPStorageProviderService(BaseStorageProviderService):
233
254
  storage (str): The bucket name
234
255
  key (str): The key/path of the item to download
235
256
  progress_callback (Callable, optional): Callback function for progress updates. Defaults to None.
236
-
257
+ cancel_event (threading.Event, optional): Event to signal download cancellation. Defaults to None.
237
258
  Returns:
238
259
  str: Path to the downloaded file
239
260
 
240
261
  Raises:
241
262
  ReadStorageItemsError: If an error occurs while downloading the item
242
263
  """
264
+ download_path = None
243
265
  try:
244
266
  bucket = self.client.bucket(storage)
245
267
  blob = bucket.get_blob(key)
246
268
  if not blob:
247
269
  raise BlobNotFoundError(key)
270
+
248
271
  download_path = Path(user_downloads_dir()) / Path(key).name
249
- blob.download_to_filename(str(download_path))
272
+
273
+ suffix = Path(key).suffix
274
+ download_tmp_path = (
275
+ Path(user_downloads_dir())
276
+ / f"{next(tempfile._get_candidate_names())}{suffix}" # type: ignore
277
+ )
278
+
279
+ downloaded = 0
280
+
281
+ with open(download_tmp_path, "wb") as file:
282
+ reader = blob.open("rb") # streaming mode
283
+ while True:
284
+ if cancel_event and cancel_event.is_set():
285
+ raise Exception("Download cancelled")
286
+
287
+ chunk = reader.read(DOWNLOAD_BLOCK_SIZE)
288
+ if not chunk:
289
+ break
290
+
291
+ file.write(chunk)
292
+ chunk_size = len(chunk)
293
+ downloaded += chunk_size
294
+
295
+ if progress_callback:
296
+ progress_callback(chunk_size)
297
+
298
+ shutil.move(download_tmp_path, download_path)
250
299
  return str(download_path)
300
+
251
301
  except Exception as ex:
302
+ if download_path and Path(download_path).exists():
303
+ Path(download_path).unlink()
252
304
  raise ReadStorageItemsError(str(ex)) from ex
253
305
 
254
306
  def get_file_size(self, storage: str, key: str) -> int:
@@ -273,3 +325,78 @@ class GCPStorageProviderService(BaseStorageProviderService):
273
325
  return blob.size
274
326
  except Exception as ex:
275
327
  raise ReadStorageItemsError(str(ex)) from ex
328
+
329
+ def _upload_storage_item_multipart(
330
+ self,
331
+ blob,
332
+ source_path,
333
+ cancel_event: threading.Event | None = None,
334
+ progress_callback: Callable | None = None,
335
+ ):
336
+ """
337
+ Upload a file to the specified GCP bucket path using multipart upload.
338
+
339
+ This method is not implemented in the current version.
340
+ """
341
+ blob.chunk_size = MULTIPART_UPLOAD_BLOCK_SIZE
342
+
343
+ with CancelableFileReader(
344
+ source_path,
345
+ cancel_event,
346
+ chunk_size=MULTIPART_UPLOAD_BLOCK_SIZE,
347
+ progress_callback=progress_callback,
348
+ ) as stream:
349
+ blob.upload_from_file(
350
+ stream,
351
+ rewind=True, # allow re-seek to beginning if needed
352
+ content_type="application/octet-stream",
353
+ )
354
+
355
+
356
+ class CancelableFileReader:
357
+ def __init__(
358
+ self,
359
+ file_path,
360
+ cancel_event: threading.Event | None,
361
+ chunk_size,
362
+ progress_callback: Callable | None = None,
363
+ ):
364
+ self.file_path = file_path
365
+ self.file = None
366
+ self.cancel_event = cancel_event
367
+ self.chunk_size = chunk_size
368
+ self.progress_callback = progress_callback
369
+
370
+ def read(self, size=None):
371
+ if self.cancel_event and self.cancel_event.is_set():
372
+ raise RuntimeError("Upload cancelled")
373
+
374
+ if self.file is None:
375
+ raise RuntimeError("File is not opened")
376
+ chunk_size = size or self.chunk_size
377
+ data = self.file.read(chunk_size)
378
+ if data and self.progress_callback:
379
+ self.progress_callback(len(data))
380
+ return data
381
+
382
+ def seek(self, offset, whence=0):
383
+ if self.file is None:
384
+ raise RuntimeError("File is not opened")
385
+ return self.file.seek(offset, whence)
386
+
387
+ def tell(self):
388
+ if self.file is None:
389
+ raise RuntimeError("File is not opened")
390
+ return self.file.tell()
391
+
392
+ def close(self):
393
+ if self.file is None:
394
+ return None
395
+ return self.file.close()
396
+
397
+ def __enter__(self):
398
+ self.file = open(self.file_path, "rb")
399
+ return self
400
+
401
+ def __exit__(self, exc_type, exc_val, exc_tb):
402
+ self.close()
@@ -4,7 +4,9 @@ Implementation of S3 compatible storage provider services.
4
4
  This module provides concrete implementations of the BaseStorageProviderService
5
5
  interface for various cloud storage providers.
6
6
  """
7
-
7
+ import shutil
8
+ import tempfile
9
+ import threading
8
10
  from collections.abc import Callable
9
11
  from itertools import groupby
10
12
  from pathlib import Path
@@ -32,7 +34,7 @@ from sourcerer.infrastructure.storage_provider.exceptions import (
32
34
  )
33
35
  from sourcerer.infrastructure.storage_provider.registry import storage_provider
34
36
  from sourcerer.infrastructure.utils import generate_uuid, is_text_file
35
- from sourcerer.settings import PAGE_SIZE, PATH_DELIMITER
37
+ from sourcerer.settings import MULTIPART_UPLOAD_BLOCK_SIZE, PAGE_SIZE, PATH_DELIMITER
36
38
 
37
39
 
38
40
  @storage_provider(StorageProvider.S3)
@@ -223,6 +225,8 @@ class S3ProviderService(BaseStorageProviderService):
223
225
  storage_path: str,
224
226
  source_path: Path,
225
227
  dest_path: str | None = None,
228
+ cancel_event: threading.Event | None = None,
229
+ progress_callback: Callable | None = None,
226
230
  ) -> None:
227
231
  """
228
232
  Upload a file to the specified S3 bucket path.
@@ -232,18 +236,34 @@ class S3ProviderService(BaseStorageProviderService):
232
236
  storage_path (str): The path within the bucket
233
237
  source_path (Path): Local file path to upload
234
238
  dest_path (str, optional): Destination path in S3. Defaults to None.
239
+ cancel_event (threading.Event, optional): Event to signal upload cancellation. Defaults to None.
240
+ progress_callback (Callable, optional): Callback function for upload progress. Defaults to None.
235
241
 
236
242
  Raises:
237
243
  UploadStorageItemsError: If an error occurs while uploading the item
238
244
  """
239
245
  try:
240
246
  dest_path = str(Path(storage_path or "") / (dest_path or source_path.name))
241
- self.client.upload_file(source_path, storage, dest_path)
247
+ if source_path.stat().st_size <= MULTIPART_UPLOAD_BLOCK_SIZE:
248
+ self.client.upload_file(source_path, storage, dest_path)
249
+ else:
250
+ self._upload_storage_item_multipart(
251
+ source_path,
252
+ storage,
253
+ dest_path,
254
+ MULTIPART_UPLOAD_BLOCK_SIZE,
255
+ cancel_event,
256
+ progress_callback,
257
+ )
242
258
  except Exception as ex:
243
259
  raise UploadStorageItemsError(str(ex)) from ex
244
260
 
245
261
  def download_storage_item(
246
- self, storage: str, key: str, progress_callback: Callable | None = None
262
+ self,
263
+ storage: str,
264
+ key: str,
265
+ progress_callback: Callable | None = None,
266
+ cancel_event: threading.Event | None = None,
247
267
  ) -> str:
248
268
  """
249
269
  Download a file from S3 to local filesystem.
@@ -259,11 +279,25 @@ class S3ProviderService(BaseStorageProviderService):
259
279
  Raises:
260
280
  ReadStorageItemsError: If an error occurs while downloading the item
261
281
  """
282
+
283
+ def callback(size):
284
+ if progress_callback:
285
+ progress_callback(size)
286
+ if cancel_event and cancel_event.is_set():
287
+ raise ReadStorageItemsError("Download cancelled")
288
+
262
289
  try:
263
290
  download_path = Path(user_downloads_dir()) / Path(key).name
291
+ suffix = Path(key).suffix
292
+ download_tmp_path = (
293
+ Path(user_downloads_dir())
294
+ / f"{next(tempfile._get_candidate_names())}{suffix}" # type: ignore
295
+ )
296
+
264
297
  self.client.download_file(
265
- storage, key, download_path, Callback=progress_callback
298
+ storage, key, download_tmp_path, Callback=callback
266
299
  )
300
+ shutil.move(download_tmp_path, download_path)
267
301
  return str(download_path)
268
302
  except Exception as ex:
269
303
  raise ReadStorageItemsError(str(ex)) from ex
@@ -287,3 +321,68 @@ class S3ProviderService(BaseStorageProviderService):
287
321
  return metadata.get("ContentLength")
288
322
  except Exception as ex:
289
323
  raise ReadStorageItemsError(str(ex)) from ex
324
+
325
+ def _upload_storage_item_multipart(
326
+ self,
327
+ source_path,
328
+ storage: str,
329
+ dest_path: str,
330
+ block_size: int,
331
+ cancel_event: threading.Event | None = None,
332
+ progress_callback: Callable | None = None,
333
+ ):
334
+ upload_id = None
335
+ try:
336
+ parts = []
337
+
338
+ with open(source_path, "rb") as file_handle:
339
+ # Initiate multipart upload
340
+ response = self.client.create_multipart_upload(
341
+ Bucket=storage, Key=dest_path
342
+ )
343
+ upload_id = response["UploadId"]
344
+
345
+ part_number = 1
346
+
347
+ while chunk := file_handle.read(block_size):
348
+ if cancel_event and cancel_event.is_set():
349
+ raise UploadStorageItemsError("Upload cancelled")
350
+
351
+ part_response = self.client.upload_part(
352
+ Bucket=storage,
353
+ Key=dest_path,
354
+ PartNumber=part_number,
355
+ UploadId=upload_id,
356
+ Body=chunk,
357
+ )
358
+
359
+ parts.append(
360
+ {
361
+ "PartNumber": part_number,
362
+ "ETag": part_response["ETag"],
363
+ }
364
+ )
365
+
366
+ if progress_callback:
367
+ progress_callback(len(chunk))
368
+
369
+ part_number += 1
370
+
371
+ # Finalize upload
372
+ if cancel_event and cancel_event.is_set():
373
+ raise Exception("Upload canceled before completion")
374
+
375
+ self.client.complete_multipart_upload(
376
+ Bucket=storage,
377
+ Key=dest_path,
378
+ UploadId=upload_id,
379
+ MultipartUpload={"Parts": parts},
380
+ )
381
+
382
+ except Exception:
383
+ # Abort multipart if error or cancel
384
+ if upload_id:
385
+ self.client.abort_multipart_upload(
386
+ Bucket=storage, Key=dest_path, UploadId=upload_id
387
+ )
388
+ raise