data-sourcerer 0.1.0__py3-none-any.whl → 0.2.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (54) hide show
  1. {data_sourcerer-0.1.0.dist-info → data_sourcerer-0.2.0.dist-info}/METADATA +25 -4
  2. {data_sourcerer-0.1.0.dist-info → data_sourcerer-0.2.0.dist-info}/RECORD +53 -50
  3. {data_sourcerer-0.1.0.dist-info → data_sourcerer-0.2.0.dist-info}/WHEEL +1 -1
  4. sourcerer/__init__.py +1 -1
  5. sourcerer/domain/access_credentials/entities.py +17 -0
  6. sourcerer/domain/access_credentials/exceptions.py +1 -1
  7. sourcerer/domain/access_credentials/repositories.py +1 -1
  8. sourcerer/domain/access_credentials/services.py +14 -2
  9. sourcerer/domain/file_system/exceptions.py +1 -1
  10. sourcerer/domain/file_system/services.py +2 -2
  11. sourcerer/domain/shared/entities.py +1 -0
  12. sourcerer/domain/storage_provider/entities.py +3 -4
  13. sourcerer/domain/storage_provider/exceptions.py +1 -1
  14. sourcerer/domain/storage_provider/services.py +13 -9
  15. sourcerer/infrastructure/access_credentials/exceptions.py +15 -2
  16. sourcerer/infrastructure/access_credentials/registry.py +3 -4
  17. sourcerer/infrastructure/access_credentials/services.py +141 -44
  18. sourcerer/infrastructure/db/models.py +1 -1
  19. sourcerer/infrastructure/file_system/exceptions.py +9 -9
  20. sourcerer/infrastructure/file_system/services.py +16 -16
  21. sourcerer/infrastructure/storage_provider/exceptions.py +28 -8
  22. sourcerer/infrastructure/storage_provider/registry.py +2 -3
  23. sourcerer/infrastructure/storage_provider/services/__init__.py +0 -0
  24. sourcerer/infrastructure/storage_provider/services/azure.py +261 -0
  25. sourcerer/infrastructure/storage_provider/services/gcp.py +277 -0
  26. sourcerer/infrastructure/storage_provider/services/s3.py +290 -0
  27. sourcerer/infrastructure/utils.py +2 -4
  28. sourcerer/presentation/screens/critical_error/main.py +3 -4
  29. sourcerer/presentation/screens/file_system_finder/main.py +4 -4
  30. sourcerer/presentation/screens/file_system_finder/widgets/file_system_navigator.py +12 -12
  31. sourcerer/presentation/screens/main/main.py +57 -33
  32. sourcerer/presentation/screens/main/messages/delete_request.py +1 -2
  33. sourcerer/presentation/screens/main/messages/download_request.py +1 -2
  34. sourcerer/presentation/screens/main/mixins/resize_containers_watcher_mixin.py +3 -3
  35. sourcerer/presentation/screens/main/widgets/gradient.py +2 -5
  36. sourcerer/presentation/screens/main/widgets/resizing_rule.py +1 -1
  37. sourcerer/presentation/screens/main/widgets/storage_content.py +12 -13
  38. sourcerer/presentation/screens/main/widgets/storage_list_sidebar.py +8 -6
  39. sourcerer/presentation/screens/preview_content/main.py +15 -4
  40. sourcerer/presentation/screens/preview_content/styles.tcss +2 -1
  41. sourcerer/presentation/screens/provider_creds_list/main.py +2 -2
  42. sourcerer/presentation/screens/provider_creds_registration/main.py +26 -11
  43. sourcerer/presentation/screens/question/main.py +1 -1
  44. sourcerer/presentation/screens/shared/containers.py +1 -1
  45. sourcerer/presentation/screens/shared/widgets/labeled_input.py +1 -1
  46. sourcerer/presentation/screens/storage_action_progress/main.py +34 -20
  47. sourcerer/presentation/screens/storage_action_progress/styles.tcss +11 -0
  48. sourcerer/presentation/utils.py +7 -3
  49. sourcerer/settings.py +4 -0
  50. sourcerer/utils.py +2 -2
  51. sourcerer/infrastructure/storage_provider/services.py +0 -509
  52. {data_sourcerer-0.1.0.dist-info → data_sourcerer-0.2.0.dist-info}/entry_points.txt +0 -0
  53. {data_sourcerer-0.1.0.dist-info → data_sourcerer-0.2.0.dist-info}/licenses/LICENSE +0 -0
  54. {data_sourcerer-0.1.0.dist-info → data_sourcerer-0.2.0.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,261 @@
1
+ """
2
+ Implementation of Azure storage provider services.
3
+
4
+ This module provides concrete implementations of the BaseStorageProviderService
5
+ interface for various cloud storage providers.
6
+ """
7
+
8
+ import os.path
9
+ from collections.abc import Callable
10
+ from pathlib import Path
11
+ from typing import Any
12
+
13
+ import humanize
14
+ from azure.mgmt.storage import StorageManagementClient
15
+ from azure.storage.blob import BlobServiceClient
16
+ from platformdirs import user_downloads_dir
17
+
18
+ from sourcerer.domain.shared.entities import StorageProvider
19
+ from sourcerer.domain.storage_provider.entities import (
20
+ File,
21
+ Folder,
22
+ Storage,
23
+ StorageContent,
24
+ StoragePermissions,
25
+ )
26
+ from sourcerer.domain.storage_provider.services import BaseStorageProviderService
27
+ from sourcerer.infrastructure.storage_provider.exceptions import (
28
+ AzureMissingContainerError,
29
+ DeleteStorageItemsError,
30
+ ListStorageItemsError,
31
+ ListStoragesError,
32
+ ReadStorageItemsError,
33
+ UploadStorageItemsError,
34
+ )
35
+ from sourcerer.infrastructure.storage_provider.registry import storage_provider
36
+ from sourcerer.infrastructure.utils import generate_uuid, is_text_file
37
+
38
+
39
+ @storage_provider(StorageProvider.AzureStorage)
40
+ class AzureStorageProviderService(BaseStorageProviderService):
41
+
42
+ def __init__(self, credentials: Any):
43
+ """
44
+ Initialize the service with Azure credentials.
45
+
46
+ Args:
47
+ credentials (Any): Azure client or credentials object
48
+ """
49
+ self.credentials = credentials.credentials
50
+ self.subscription_id = credentials.subscription_id
51
+ self.cloud_suffix = credentials.cloud_suffix
52
+
53
+ def get_accounts_client(self) -> StorageManagementClient:
54
+ """
55
+ Get the Azure accounts client.
56
+
57
+ Returns:
58
+ Any: Azure accounts client
59
+ """
60
+ return StorageManagementClient(self.credentials, self.subscription_id)
61
+
62
+ def get_containers_client(self, storage: str):
63
+ """
64
+ Retrieves a BlobServiceClient instance for interacting with a specific Azure Blob
65
+ Storage account.
66
+
67
+ Parameters:
68
+ storage (str): The name of the Azure storage account to connect to.
69
+
70
+ Returns:
71
+ BlobServiceClient: An instance of the BlobServiceClient, configured with the
72
+ account URL and credentials.
73
+ """
74
+ account_url = "https://{account}.{cloud_suffix}"
75
+ return BlobServiceClient(
76
+ account_url.format(account=storage, cloud_suffix=self.cloud_suffix),
77
+ credential=self.credentials,
78
+ retry_connect=0,
79
+ )
80
+
81
+ def list_storages(self) -> list[Storage]:
82
+ """
83
+ Return a list of available Azure containers.
84
+
85
+ Returns:
86
+ List[Storage]: List of storage objects representing Azure containers
87
+
88
+ Raises:
89
+ ListStoragesError: If an error occurs while listing buckets
90
+ """
91
+ try:
92
+ accounts_client = self.get_accounts_client()
93
+ return [
94
+ Storage(StorageProvider.AzureStorage, i.name, i.creation_time) # type: ignore
95
+ for i in accounts_client.storage_accounts.list()
96
+ ]
97
+ except Exception as ex:
98
+ raise ListStoragesError(str(ex)) from ex
99
+
100
+ def get_storage_permissions(self, storage: str) -> list[StoragePermissions]:
101
+ raise NotImplementedError("Not implemented")
102
+
103
+ def list_storage_items(
104
+ self, storage: str, path: str, prefix: str
105
+ ) -> StorageContent:
106
+ """
107
+ List items in the specified Azure container path with the given prefix.
108
+
109
+ Args:
110
+ storage (str): The container name
111
+ path (str): The path within the container to list
112
+ prefix (str): Filter items by this prefix
113
+ """
114
+ try:
115
+ containers_client = self.get_containers_client(storage)
116
+ files = []
117
+
118
+ folders = set()
119
+ if not path:
120
+ folders.update([i.name for i in containers_client.list_containers()])
121
+ else:
122
+ path_parts = path.split("/", 1)
123
+ if len(path_parts) > 1:
124
+ path, prefix = path_parts[0], path_parts[1] + "/" + prefix
125
+ blobs_client = containers_client.get_container_client(path)
126
+ for blob in blobs_client.walk_blobs(
127
+ name_starts_with=prefix, delimiter="/"
128
+ ):
129
+ remaining_path = blob.name[len(prefix) :]
130
+ if "/" in remaining_path:
131
+ folder_name = remaining_path.split("/")[0]
132
+ if folder_name not in folders:
133
+ folders.add(folder_name)
134
+ continue # skip subfolders
135
+
136
+ files.append(
137
+ File(
138
+ generate_uuid(),
139
+ remaining_path,
140
+ size=humanize.naturalsize(blob.size),
141
+ date_modified=blob.last_modified,
142
+ is_text=is_text_file(blob.name),
143
+ )
144
+ )
145
+ return StorageContent(files=files, folders=[Folder(key) for key in folders])
146
+ except Exception as ex:
147
+ raise ListStorageItemsError(str(ex)) from ex
148
+
149
+ def read_storage_item(self, storage: str, key: str) -> str:
150
+ """
151
+ Read and return the content of the specified Azure object.
152
+
153
+ Args:
154
+ storage (str): The container name
155
+ key (str): The key/path of the item to read
156
+ """
157
+ try:
158
+ containers_client = self.get_containers_client(storage)
159
+ path_parts = key.split("/", 1)
160
+ container, blob_name = path_parts
161
+ blobs_client = containers_client.get_container_client(container)
162
+ content = blobs_client.download_blob(blob_name).readall()
163
+ return content.decode("utf-8")
164
+ except Exception as ex:
165
+ raise ReadStorageItemsError(str(ex)) from ex
166
+
167
+ def delete_storage_item(self, storage: str, key: str) -> None:
168
+ """
169
+ Delete the specified Azure object.
170
+
171
+ Args:
172
+ storage (str): The container name
173
+ key (str): The key/path of the item to delete
174
+ """
175
+ try:
176
+ containers_client = self.get_containers_client(storage)
177
+ path_parts = key.split("/", 1)
178
+ container, blob_name = path_parts
179
+ blob_client = containers_client.get_container_client(container)
180
+ blob_client.delete_blob(blob_name)
181
+ except Exception as ex:
182
+ raise DeleteStorageItemsError(str(ex)) from ex
183
+
184
+ def upload_storage_item(
185
+ self,
186
+ storage: str,
187
+ storage_path: str,
188
+ source_path: Path,
189
+ dest_path: str | None = None,
190
+ ) -> None:
191
+ """
192
+ Upload a file to the specified Azure container path.
193
+ Args:
194
+ storage (str): The container name
195
+ storage_path (str): The path within the container to upload
196
+ source_path (Path): Local file path to upload
197
+ dest_path (str, optional): Destination path in storage. Defaults to None.
198
+ """
199
+ try:
200
+ if not storage_path:
201
+ raise AzureMissingContainerError("Container is required for Azure storage")
202
+
203
+ containers_client = self.get_containers_client(storage)
204
+
205
+ storage_path_parts = storage_path.split("/", 1)
206
+
207
+ container = storage_path_parts[0]
208
+
209
+ storage_path = storage_path_parts[1] if len(storage_path_parts) > 1 else ""
210
+ blob_name = os.path.join(storage_path, dest_path or source_path.name)
211
+
212
+ blob_client = containers_client.get_container_client(container)
213
+ with open(source_path, "rb") as file_handle:
214
+ blob_client.upload_blob(
215
+ blob_name or source_path.name, file_handle, overwrite=True
216
+ )
217
+ except Exception as ex:
218
+ raise UploadStorageItemsError(str(ex)) from ex
219
+
220
+ def download_storage_item(
221
+ self, storage: str, key: str, progress_callback: Callable | None = None
222
+ ) -> str:
223
+ """
224
+ Download a file from Azure to the local filesystem.
225
+
226
+ Args:
227
+ storage (str): The container name
228
+ key (str): The key/path of the item to download
229
+ progress_callback (Callable, optional): Callback function for progress updates. Defaults to None.
230
+ """
231
+ try:
232
+ download_path = Path(user_downloads_dir()) / Path(key).name
233
+
234
+ containers_client = self.get_containers_client(storage)
235
+ path_parts = key.split("/", 1)
236
+ container, blob_name = path_parts
237
+ blob_client = containers_client.get_container_client(container)
238
+ with open(download_path, "wb") as file:
239
+ download_stream = blob_client.download_blob(blob_name)
240
+ file.write(download_stream.readall())
241
+ return str(download_path)
242
+ except Exception as ex:
243
+ raise ReadStorageItemsError(str(ex)) from ex
244
+
245
+ def get_file_size(self, storage: str, key: str) -> int:
246
+ """
247
+ Get metadata for an Azure object without downloading content.
248
+
249
+ Args:
250
+ storage (str): The container name
251
+ key (str): The key/path of the item
252
+ """
253
+ try:
254
+ containers_client = self.get_containers_client(storage)
255
+ path_parts = key.split("/", 1)
256
+ container, blob_name = path_parts
257
+ blob_client = containers_client.get_blob_client(container, blob_name)
258
+ props = blob_client.get_blob_properties()
259
+ return props.size
260
+ except Exception as ex:
261
+ raise ReadStorageItemsError(str(ex)) from ex
@@ -0,0 +1,277 @@
1
+ """
2
+ Implementation of GCP storage provider services.
3
+
4
+ This module provides concrete implementations of the BaseStorageProviderService
5
+ interface for various cloud storage providers.
6
+ """
7
+
8
+ from collections.abc import Callable
9
+ from pathlib import Path
10
+ from typing import Any
11
+
12
+ import humanize
13
+ from platformdirs import user_downloads_dir
14
+
15
+ from sourcerer.domain.shared.entities import StorageProvider
16
+ from sourcerer.domain.storage_provider.entities import (
17
+ File,
18
+ Folder,
19
+ Storage,
20
+ StorageContent,
21
+ StoragePermissions,
22
+ )
23
+ from sourcerer.domain.storage_provider.services import BaseStorageProviderService
24
+ from sourcerer.infrastructure.storage_provider.exceptions import (
25
+ BlobNotFoundError,
26
+ DeleteStorageItemsError,
27
+ ListStorageItemsError,
28
+ ListStoragesError,
29
+ ReadStorageItemsError,
30
+ StoragePermissionError,
31
+ UploadStorageItemsError,
32
+ )
33
+ from sourcerer.infrastructure.storage_provider.registry import storage_provider
34
+ from sourcerer.infrastructure.utils import generate_uuid, is_text_file
35
+ from sourcerer.settings import PAGE_SIZE, PATH_DELIMITER
36
+
37
+
38
+ @storage_provider(StorageProvider.GoogleCloudStorage)
39
+ class GCPStorageProviderService(BaseStorageProviderService):
40
+ """
41
+ Google Cloud Platform storage provider service implementation.
42
+
43
+ This class provides methods for interacting with GCP Cloud Storage,
44
+ implementing the BaseStorageProviderService interface.
45
+ """
46
+
47
+ def __init__(self, credentials: Any):
48
+ """
49
+ Initialize the service with GCP credentials.
50
+
51
+ Args:
52
+ credentials (Any): GCP client or credentials object
53
+ """
54
+ self.client = credentials
55
+
56
+ def list_storages(self) -> list[Storage]:
57
+ """
58
+ Return a list of available GCP buckets.
59
+
60
+ Returns:
61
+ List[Storage]: List of storage objects representing GCP buckets
62
+
63
+ Raises:
64
+ ListStoragesError: If an error occurs while listing buckets
65
+ """
66
+ try:
67
+ return [
68
+ Storage(StorageProvider.GoogleCloudStorage, i.name, i.time_created)
69
+ for i in self.client.list_buckets()
70
+ ]
71
+ except Exception as ex:
72
+ raise ListStoragesError(str(ex)) from ex
73
+
74
+ def get_storage_permissions(self, storage: str) -> list[StoragePermissions]:
75
+ """
76
+ Return the permissions for the specified GCP bucket.
77
+
78
+ Args:
79
+ storage (str): The bucket name
80
+
81
+ Returns:
82
+ List[StoragePermissions]: List of permission objects for the bucket
83
+
84
+ Raises:
85
+ StoragePermissionError: If an error occurs while getting permissions
86
+ """
87
+ try:
88
+ bucket = self.client.get_bucket(storage)
89
+ policy = bucket.get_iam_policy()
90
+
91
+ result = {}
92
+ for role, members in policy.items():
93
+ for member in members:
94
+ member = member.split(":")[-1]
95
+ if member not in result:
96
+ result[member] = set()
97
+ result[member].add(role)
98
+ return [
99
+ StoragePermissions(member, roles) for member, roles in result.items()
100
+ ]
101
+ except Exception as ex:
102
+ raise StoragePermissionError(str(ex)) from ex
103
+
104
+ def list_storage_items(
105
+ self, storage: str, path: str = "", prefix: str = ""
106
+ ) -> StorageContent:
107
+ """
108
+ List items in the specified GCP bucket path with the given prefix.
109
+
110
+ Args:
111
+ storage (str): The bucket name
112
+ path (str, optional): The path within the bucket. Defaults to ''.
113
+ prefix (str, optional): Filter items by this prefix. Defaults to ''.
114
+
115
+ Returns:
116
+ StorageContent: Object containing files and folders at the specified location
117
+
118
+ Raises:
119
+ ListStorageItemsError: If an error occurs while listing items
120
+ """
121
+ try:
122
+
123
+ files = []
124
+ folders = []
125
+ if path and not path.endswith("/"):
126
+ path += "/"
127
+
128
+ bucket = self.client.bucket(storage)
129
+
130
+ blobs = bucket.list_blobs(
131
+ prefix=path + prefix, delimiter=PATH_DELIMITER, max_results=PAGE_SIZE
132
+ )
133
+
134
+ for blob in blobs:
135
+ files.append(
136
+ File(
137
+ generate_uuid(),
138
+ blob.name[len(path) :],
139
+ size=humanize.naturalsize(blob.size),
140
+ date_modified=blob.updated.date(),
141
+ is_text=is_text_file(blob.name),
142
+ )
143
+ )
144
+
145
+ for folder in blobs.prefixes:
146
+ relative_path = folder[len(path) :]
147
+ folders.append(Folder(relative_path))
148
+
149
+ return StorageContent(files=files, folders=folders)
150
+
151
+ except Exception as ex:
152
+ raise ListStorageItemsError(
153
+ f"Failed to list items in {storage}: {str(ex)}"
154
+ ) from ex
155
+
156
+ def read_storage_item(self, storage: str, key: str) -> str:
157
+ """
158
+ Read and return the content of the specified GCP object.
159
+
160
+ Args:
161
+ storage (str): The bucket name
162
+ key (str): The key/path of the item to read
163
+
164
+ Returns:
165
+ str: The UTF-8 decoded content of the GCP object
166
+
167
+ Raises:
168
+ ReadStorageItemsError: If an error occurs while reading the item
169
+ """
170
+ try:
171
+ bucket = self.client.bucket(storage)
172
+ blob = bucket.get_blob(key)
173
+ if not blob:
174
+ raise BlobNotFoundError(key)
175
+ content = blob.download_as_bytes()
176
+ return content.decode("utf-8")
177
+ except Exception as ex:
178
+ raise ReadStorageItemsError(str(ex)) from ex
179
+
180
+ def delete_storage_item(self, storage: str, key: str) -> None:
181
+ """
182
+ Delete the specified GCP object.
183
+
184
+ Args:
185
+ storage (str): The bucket name
186
+ key (str): The key/path of the item to delete
187
+
188
+ Raises:
189
+ DeleteStorageItemsError: If an error occurs while deleting the item
190
+ """
191
+ try:
192
+ bucket = self.client.bucket(storage)
193
+ blob = bucket.get_blob(key)
194
+ if not blob:
195
+ raise BlobNotFoundError(key)
196
+ blob.delete()
197
+ except Exception as ex:
198
+ raise DeleteStorageItemsError(str(ex)) from ex
199
+
200
+ def upload_storage_item(
201
+ self,
202
+ storage: str,
203
+ storage_path: str,
204
+ source_path: Path,
205
+ dest_path: str | None = None,
206
+ ) -> None:
207
+ """
208
+ Upload a file to the specified GCP bucket path.
209
+
210
+ Args:
211
+ storage (str): The bucket name
212
+ storage_path (str): The path within the bucket
213
+ source_path (Path): Local file path to upload
214
+ dest_path (str, optional): Destination path in GCP. Defaults to None.
215
+
216
+ Raises:
217
+ UploadStorageItemsError: If an error occurs while uploading the item
218
+ """
219
+ try:
220
+ bucket = self.client.bucket(storage)
221
+ storage_path = str(
222
+ Path(storage_path or "") / (dest_path or source_path.name)
223
+ )
224
+ bucket.blob(storage_path).upload_from_filename(source_path)
225
+ except Exception as ex:
226
+ raise UploadStorageItemsError(str(ex)) from ex
227
+
228
+ def download_storage_item(
229
+ self, storage: str, key: str, progress_callback: Callable | None = None
230
+ ) -> str:
231
+ """
232
+ Download a file from GCP to the local filesystem.
233
+
234
+ Args:
235
+ storage (str): The bucket name
236
+ key (str): The key/path of the item to download
237
+ progress_callback (Callable, optional): Callback function for progress updates. Defaults to None.
238
+
239
+ Returns:
240
+ str: Path to the downloaded file
241
+
242
+ Raises:
243
+ ReadStorageItemsError: If an error occurs while downloading the item
244
+ """
245
+ try:
246
+ bucket = self.client.bucket(storage)
247
+ blob = bucket.get_blob(key)
248
+ if not blob:
249
+ raise BlobNotFoundError(key)
250
+ download_path = Path(user_downloads_dir()) / Path(key).name
251
+ blob.download_to_filename(str(download_path))
252
+ return str(download_path)
253
+ except Exception as ex:
254
+ raise ReadStorageItemsError(str(ex)) from ex
255
+
256
+ def get_file_size(self, storage: str, key: str) -> int:
257
+ """
258
+ Get metadata for a GCP object without downloading content.
259
+
260
+ Args:
261
+ storage (str): The bucket name
262
+ key (str): The key/path of the item
263
+
264
+ Returns:
265
+ int: Size of the storage item in bytes
266
+
267
+ Raises:
268
+ ReadStorageItemsError: If an error occurs while getting metadata
269
+ """
270
+ try:
271
+ bucket = self.client.bucket(storage)
272
+ blob = bucket.get_blob(key)
273
+ if not blob:
274
+ raise BlobNotFoundError(key)
275
+ return blob.size
276
+ except Exception as ex:
277
+ raise ReadStorageItemsError(str(ex)) from ex