bw-essentials-core 0.0.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of bw-essentials-core might be problematic. Click here for more details.

@@ -0,0 +1,361 @@
1
+ """
2
+ Module for interacting with AWS S3.
3
+
4
+ This module contains the S3Utils class, which provides utility functions for:
5
+ - Uploading and downloading files
6
+ - Listing objects
7
+ - Reading content
8
+ - Moving and deleting files
9
+ - Checking file existence
10
+
11
+ Supports custom S3 endpoints (e.g., for localstack or MinIO).
12
+ """
13
+
14
+ import logging
15
+ import os
16
+ import sys
17
+ from importlib.util import spec_from_file_location, module_from_spec
18
+ from typing import Optional, List
19
+
20
+ import boto3
21
+ from botocore.exceptions import ClientError
22
+
23
+ logger = logging.getLogger(__name__)
24
+
25
+
26
+ class S3Utils:
27
+ """
28
+ Utility class for managing AWS S3 operations.
29
+ """
30
+
31
+ def __init__(self):
32
+ """
33
+ Initialize S3Utils instance with AWS credentials.
34
+
35
+ Args:
36
+ access_key (str): AWS access key.
37
+ secret_key (str): AWS secret key.
38
+ endpoint_url (Optional[str]): Custom endpoint URL for S3-compatible services.
39
+ """
40
+ self.access_key = self._get_env_var("S3_ACCESS_KEY")
41
+ self.secret_key = self._get_env_var("S3_ACCESS_KEY")
42
+ self.endpoint_url = self._get_env_var("S3_ENDPOINT_URL")
43
+ self.s3_instance = self._get_s3_instance()
44
+
45
+ def _get_env_var(self, key: str) -> str:
46
+ """
47
+ Fetch a required variable from bw_config.py located in the root directory.
48
+
49
+ Raises:
50
+ FileNotFoundError: If bw_config.py is not found.
51
+ AttributeError: If the requested key is not defined in the config.
52
+
53
+ Returns:
54
+ str: The value of the config variable.
55
+ """
56
+ config_path = os.path.join(os.getcwd(), "bw_config.py")
57
+
58
+ if not os.path.exists(config_path):
59
+ raise FileNotFoundError("`bw_config.py` file not found in the root directory. "
60
+ "Please ensure the config file exists.")
61
+
62
+ spec = spec_from_file_location("bw_config", config_path)
63
+ bw_config = module_from_spec(spec)
64
+ sys.modules["bw_config"] = bw_config
65
+ spec.loader.exec_module(bw_config)
66
+
67
+ if not hasattr(bw_config, key):
68
+ raise AttributeError(f"`{key}` not found in bw_config.py. Please define it in the config.")
69
+
70
+ return getattr(bw_config, key)
71
+
72
+ def _get_s3_instance(self):
73
+ """
74
+ Create and return a boto3 S3 client instance.
75
+ """
76
+ try:
77
+ logger.info("Initializing S3 client instance")
78
+ return boto3.client(
79
+ 's3',
80
+ aws_access_key_id=self.access_key,
81
+ aws_secret_access_key=self.secret_key,
82
+ endpoint_url=self.endpoint_url
83
+ )
84
+ except Exception as exp:
85
+ logger.error("Error getting S3 client", exc_info=exp)
86
+ raise
87
+
88
+ def download_file(self, bucket_name: str, file_key: str, local_path: str):
89
+ """
90
+ Download a file from S3 to a local path.
91
+
92
+ Args:
93
+ bucket_name (str): Name of the S3 bucket.
94
+ file_key (str): Key of the file in S3.
95
+ local_path (str): Destination path on the local system.
96
+ """
97
+ try:
98
+ if not file_key or not local_path:
99
+ raise ValueError("file_key and local_path are required.")
100
+ logger.info(f"Downloading {file_key=} to {local_path=}")
101
+ self.s3_instance.download_file(bucket_name, file_key, local_path)
102
+ except Exception as e:
103
+ logger.error("Error downloading file", exc_info=e)
104
+ raise
105
+
106
+ def upload_file(self, bucket_name: str, path_to_file: str, object_name: str, content_type: str):
107
+ """
108
+ Upload a local file to an S3 bucket.
109
+
110
+ Args:
111
+ bucket_name (str): Target S3 bucket.
112
+ path_to_file (str): Local file path.
113
+ object_name (str): Destination object key in S3.
114
+ content_type (str): MIME type of the file.
115
+ """
116
+ logger.info(f"Uploading file: {path_to_file=} to {bucket_name}/{object_name=} with {content_type=}")
117
+ try:
118
+ with open(path_to_file, 'rb') as file_object:
119
+ self.s3_instance.put_object(
120
+ Bucket=bucket_name,
121
+ Key=object_name,
122
+ Body=file_object,
123
+ ContentType=content_type
124
+ )
125
+ logger.info("Upload successful")
126
+ except Exception as e:
127
+ logger.error("Error uploading file", exc_info=e)
128
+ raise
129
+
130
+ def list_files(self, bucket_name: str, prefix: Optional[str] = "") -> List[str]:
131
+ """
132
+ List files in a bucket under a given prefix.
133
+
134
+ Args:
135
+ bucket_name (str): S3 bucket name.
136
+ prefix (Optional[str]): Prefix filter for keys.
137
+
138
+ Returns:
139
+ List[str]: List of file keys.
140
+ """
141
+ try:
142
+ response = self.s3_instance.list_objects_v2(Bucket=bucket_name, Prefix=prefix)
143
+ files = [item['Key'] for item in response.get('Contents', [])]
144
+ logger.info(f"Found {len(files)} files in {bucket_name}/{prefix}")
145
+ return files
146
+ except Exception as e:
147
+ logger.error("Error listing files", exc_info=e)
148
+ raise
149
+
150
+ def list_bucket_objects(
151
+ self, bucket_name: str, filter_prefix: str = "", continuation_token: Optional[str] = None
152
+ ) -> List[dict]:
153
+ """
154
+ Recursively list all objects in a bucket under a given prefix.
155
+
156
+ Args:
157
+ bucket_name (str): S3 bucket name.
158
+ filter_prefix (str): Prefix filter for keys.
159
+ continuation_token (Optional[str]): Token for pagination.
160
+
161
+ Returns:
162
+ List[dict]: List of object metadata.
163
+ """
164
+ logger.info(f"Listing objects in {bucket_name=} with {filter_prefix=}")
165
+ all_objects = []
166
+ params = {
167
+ 'Bucket': bucket_name,
168
+ 'Prefix': filter_prefix
169
+ }
170
+
171
+ if continuation_token:
172
+ params['ContinuationToken'] = continuation_token
173
+
174
+ try:
175
+ response = self.s3_instance.list_objects_v2(**params)
176
+ if 'Contents' in response:
177
+ all_objects.extend(response['Contents'])
178
+ next_token = response.get('NextContinuationToken')
179
+ if next_token:
180
+ all_objects.extend(self.list_bucket_objects(bucket_name, filter_prefix, next_token))
181
+ return all_objects
182
+ except Exception as e:
183
+ logger.error("Error listing bucket objects", exc_info=e)
184
+ raise
185
+
186
+ def delete_file(self, bucket_name: str, file_key: str):
187
+ """
188
+ Delete a file from an S3 bucket.
189
+
190
+ Args:
191
+ bucket_name (str): Name of the S3 bucket.
192
+ file_key (str): Key of the file to delete.
193
+ """
194
+ try:
195
+ self.s3_instance.delete_object(Bucket=bucket_name, Key=file_key)
196
+ logger.info(f"Deleted {file_key} from {bucket_name}")
197
+ except Exception as e:
198
+ logger.error("Error deleting file", exc_info=e)
199
+ raise
200
+
201
+ def delete_files_by_prefix(self, bucket_name: str, prefix: str):
202
+ """
203
+ Delete all files in a bucket under a given prefix.
204
+
205
+ Args:
206
+ bucket_name (str): Name of the S3 bucket.
207
+ prefix (str): Prefix to filter files to delete.
208
+ """
209
+ try:
210
+ objects = self.list_files(bucket_name, prefix)
211
+ if not objects:
212
+ logger.info("No files to delete.")
213
+ return
214
+ delete_payload = {'Objects': [{'Key': key} for key in objects]}
215
+ self.s3_instance.delete_objects(Bucket=bucket_name, Delete=delete_payload)
216
+ logger.info(f"Deleted {len(objects)} files under prefix {prefix}")
217
+ except Exception as e:
218
+ logger.error("Error deleting files by prefix", exc_info=e)
219
+ raise
220
+
221
+ def get_latest_file_by_prefix(self, bucket_name: str, prefix: str) -> Optional[str]:
222
+ """
223
+ Get the latest modified file under a specific prefix.
224
+
225
+ Args:
226
+ bucket_name (str): Name of the S3 bucket.
227
+ prefix (str): Prefix to filter files.
228
+
229
+ Returns:
230
+ Optional[str]: Key of the latest file or None.
231
+ """
232
+ try:
233
+ response = self.s3_instance.list_objects_v2(Bucket=bucket_name, Prefix=prefix)
234
+ contents = response.get('Contents', [])
235
+ if not contents:
236
+ logger.info("No files found under prefix.")
237
+ return None
238
+ latest = max(contents, key=lambda x: x['LastModified'])
239
+ logger.info(f"Latest file under {prefix} is {latest['Key']}")
240
+ return latest['Key']
241
+ except Exception as e:
242
+ logger.error("Error getting latest file by prefix", exc_info=e)
243
+ raise
244
+
245
+ def file_exists(self, bucket_name: str, file_key: str) -> bool:
246
+ """
247
+ Check if a file exists in an S3 bucket.
248
+
249
+ Args:
250
+ bucket_name (str): Name of the S3 bucket.
251
+ file_key (str): Key of the file.
252
+
253
+ Returns:
254
+ bool: True if file exists, False otherwise.
255
+ """
256
+ try:
257
+ self.s3_instance.head_object(Bucket=bucket_name, Key=file_key)
258
+ return True
259
+ except ClientError as e:
260
+ if e.response['Error']['Code'] == "404":
261
+ return False
262
+ logger.error("Error checking file existence", exc_info=e)
263
+ raise
264
+
265
+ def read_file_content(self, bucket_name: str, file_key: str, encoding: str = "utf-8") -> str:
266
+ """
267
+ Read content of a file from S3.
268
+
269
+ Args:
270
+ bucket_name (str): S3 bucket name.
271
+ file_key (str): File key in the bucket.
272
+ encoding (str): Encoding format for content.
273
+
274
+ Returns:
275
+ str: File content as string.
276
+ """
277
+ try:
278
+ obj = self.s3_instance.get_object(Bucket=bucket_name, Key=file_key)
279
+ content = obj['Body'].read().decode(encoding)
280
+ return content
281
+ except Exception as e:
282
+ logger.error("Error reading file content", exc_info=e)
283
+ raise
284
+
285
+ def move_file(self, bucket_name: str, file_key: str, new_file_key: str):
286
+ """
287
+ Move a file within an S3 bucket (copy + delete).
288
+
289
+ Args:
290
+ bucket_name (str): S3 bucket name.
291
+ file_key (str): Source key.
292
+ new_file_key (str): Destination key.
293
+ """
294
+ try:
295
+ copy_source = {'Bucket': bucket_name, 'Key': file_key}
296
+ self.s3_instance.copy_object(Bucket=bucket_name, CopySource=copy_source, Key=new_file_key)
297
+ self.s3_instance.delete_object(Bucket=bucket_name, Key=file_key)
298
+ logger.info(f"Moved file from {file_key} to {new_file_key} in {bucket_name}")
299
+ except Exception as e:
300
+ logger.error("Error moving file", exc_info=e)
301
+ raise
302
+
303
+ def get_latest_n_files(self, bucket: str, prefix: str, count: int) -> List[str]:
304
+ """
305
+ Get the latest `n` files from the given prefix, sorted by LastModified.
306
+
307
+ Args:
308
+ bucket (str): Name of the S3 bucket.
309
+ prefix (str): S3 prefix (folder path).
310
+ count (int): Number of latest files to return.
311
+
312
+ Returns:
313
+ List[str]: List of S3 keys for the latest files.
314
+ """
315
+ try:
316
+ paginator = self.s3_instance.get_paginator("list_objects_v2")
317
+ page_iterator = paginator.paginate(Bucket=bucket, Prefix=prefix)
318
+
319
+ all_files = []
320
+ for page in page_iterator:
321
+ contents = page.get("Contents", [])
322
+ all_files.extend(contents)
323
+
324
+ sorted_files = sorted(all_files, key=lambda x: x["LastModified"], reverse=True)
325
+ latest_keys = [obj["Key"] for obj in sorted_files[:count]]
326
+
327
+ logger.info("Retrieved latest %d files from prefix %s", count, prefix)
328
+ return latest_keys
329
+ except Exception as e:
330
+ logger.error("Failed to get latest files from S3", exc_info=e)
331
+ raise
332
+
333
+ @staticmethod
334
+ def get_object_url(bucket_name: str, object_name: str) -> str:
335
+ """
336
+ Generate a public URL for an object in an S3 bucket.
337
+
338
+ Args:
339
+ bucket_name (str): Name of the S3 bucket.
340
+ object_name (str): Key of the object.
341
+
342
+ Returns:
343
+ str: Public object URL.
344
+ """
345
+ return f"https://{bucket_name}.s3.amazonaws.com/{object_name}"
346
+
347
+ def generate_presigned_url(self, bucket_name, object_key, expiration):
348
+ """
349
+ Generate a presigned URL to share an S3 object.
350
+ :param bucket_name: str - your S3 bucket name
351
+ :param object_key: str - the full path of the file in the bucket
352
+ :param expiration: int - time in seconds for the presigned URL to remain valid (default: 1 hour)
353
+ :return: str - presigned URL
354
+ """
355
+ pre_signed_url = self.s3_instance.generate_presigned_url(
356
+ 'get_object',
357
+ Params={'Bucket': bucket_name, 'Key': object_key},
358
+ ExpiresIn=expiration
359
+ )
360
+
361
+ return pre_signed_url
File without changes
@@ -0,0 +1,229 @@
1
+ """
2
+ Generic API client module for making external HTTP requests with structured logging and tracing.
3
+
4
+ This wrapper provides reusable methods to send GET, POST, and PUT requests with consistent headers,
5
+ request tracing via `x-request-id`, and tenant context for multitenant systems.
6
+
7
+ It logs key information such as:
8
+ - Service and endpoint URLs
9
+ - Request headers and payloads
10
+ - Response status and body
11
+ - Request duration in milliseconds
12
+ - Exceptions, if any
13
+ """
14
+ import logging
15
+ import os
16
+ import sys
17
+ import time
18
+ from importlib.util import spec_from_file_location, module_from_spec
19
+ from typing import Optional, Dict, Any
20
+
21
+ import requests
22
+
23
+ logger = logging.getLogger(__name__)
24
+
25
+
26
+ class ApiClient:
27
+ """
28
+ A reusable API client for external service calls with contextual logging and tracing support.
29
+
30
+ Args:
31
+ user (str): The user or system initiating the request.
32
+ """
33
+ SERVICE_NAME = 'Bw-Essentials'
34
+
35
+ def __init__(self, user: str):
36
+ logger.info(f"Initializing ApiClient with user={user}")
37
+ self.name = self.SERVICE_NAME
38
+ self.user = user
39
+ self.headers = {}
40
+ self._update_headers()
41
+ logger.info("ApiClient initialized")
42
+
43
+ def _get_env_var(self, key: str) -> str:
44
+ """
45
+ Fetch a required variable from bw_config.py located in the root directory.
46
+
47
+ Raises:
48
+ FileNotFoundError: If bw_config.py is not found.
49
+ AttributeError: If the requested key is not defined in the config.
50
+
51
+ Returns:
52
+ str: The value of the config variable.
53
+ """
54
+ config_path = os.path.join(os.getcwd(), "bw_config.py")
55
+
56
+ if not os.path.exists(config_path):
57
+ raise FileNotFoundError("`bw_config.py` file not found in the root directory. "
58
+ "Please ensure the config file exists.")
59
+
60
+ spec = spec_from_file_location("bw_config", config_path)
61
+ bw_config = module_from_spec(spec)
62
+ sys.modules["bw_config"] = bw_config
63
+ spec.loader.exec_module(bw_config)
64
+
65
+ if not hasattr(bw_config, key):
66
+ raise AttributeError(f"`{key}` not found in bw_config.py. Please define it in the config.")
67
+
68
+ return getattr(bw_config, key)
69
+
70
+ def get_base_url(self, service_name: str) -> str:
71
+ """
72
+ Resolve the base URL for a given service name using environment variables.
73
+
74
+ Args:
75
+ service_name (str): The logical name of the service.
76
+
77
+ Returns:
78
+ str: The resolved base URL from the environment.
79
+ """
80
+ env_key = f"{service_name.upper()}_BASE_URL"
81
+ return self._get_env_var(env_key)
82
+
83
+ def set_tenant_id(self, tenant_id: str) -> None:
84
+ """
85
+ Set the tenant ID in the request headers.
86
+
87
+ Args:
88
+ tenant_id (str): The tenant identifier to include in the headers.
89
+ """
90
+ logger.info(f"Setting tenant ID: {tenant_id}")
91
+ self._update_headers({"tenant_id": tenant_id})
92
+ logger.info(f"Updated headers: {self.headers}")
93
+
94
+ def set_headers(self, headers: dict) -> None:
95
+ """
96
+ Merge provided headers into the client's request headers.
97
+
98
+ Args:
99
+ headers (dict): Dictionary of headers to add or update.
100
+ """
101
+ logger.info(f"Updating headers with: {headers}")
102
+ self._update_headers(headers)
103
+ logger.info(f"Headers after update: {self.headers}")
104
+
105
+ def _update_headers(self, new_headers: dict = None) -> None:
106
+ """
107
+ Update the client's headers with new entries and tracing information.
108
+ """
109
+ new_headers = new_headers or {}
110
+
111
+ try:
112
+ from asgi_correlation_id import correlation_id
113
+ request_id = correlation_id.get()
114
+ except ImportError:
115
+ request_id = None
116
+
117
+ if not request_id:
118
+ try:
119
+ from log_request_id import local
120
+ request_id = getattr(local, 'request_id', None)
121
+ except ImportError:
122
+ request_id = None
123
+
124
+ if request_id:
125
+ new_headers["x-request-id"] = request_id
126
+ logger.info(f"Using request ID: {request_id}")
127
+ else:
128
+ logger.info("No request ID found to add to headers")
129
+
130
+ self.headers.update(new_headers)
131
+
132
+ def _log_response(self, method: str, url: str, status_code: int, elapsed_time_ms: float, response_body: Any):
133
+ logger.info(
134
+ f"{method.upper()} {url} | Status: {status_code} | Time: {elapsed_time_ms:.2f}ms "
135
+ f"| Response: {response_body}"
136
+ )
137
+
138
+ def _request(self, method: str, url: str, endpoint: str, **kwargs) -> Any:
139
+ """
140
+ Generic method to send HTTP requests and log key metrics.
141
+
142
+ Args:
143
+ method (str): HTTP method - GET, POST, PUT, etc.
144
+ url (str): Base service URL.
145
+ endpoint (str): API endpoint path.
146
+ kwargs: Additional keyword arguments for `requests.request`.
147
+
148
+ Returns:
149
+ Any: Parsed JSON response.
150
+
151
+ Raises:
152
+ Exception: If request fails or response is not valid.
153
+ """
154
+ formatted_url = f"{url.rstrip('/')}/{endpoint.lstrip('/')}"
155
+ logger.info(f"{method.upper()} {formatted_url} | Headers: {self.headers} | Params: {kwargs.get('params')}")
156
+
157
+ start = time.time()
158
+ try:
159
+ response = requests.request(method, formatted_url, headers=self.headers, **kwargs)
160
+ elapsed_time_ms = (time.time() - start) * 1000
161
+ response.raise_for_status()
162
+ json_data = response.json()
163
+ self._log_response(method, formatted_url, response.status_code, elapsed_time_ms, json_data)
164
+ return json_data
165
+ except Exception as exc:
166
+ elapsed_time_ms = (time.time() - start) * 1000
167
+ logger.error(f"{method.upper()} {formatted_url} failed after {elapsed_time_ms:.2f}ms")
168
+ logger.exception(exc)
169
+ raise
170
+
171
+ def _get(self, url: str, endpoint: str, params: Optional[Dict[str, Any]] = None) -> Any:
172
+ """
173
+ Send a GET request to the given endpoint.
174
+
175
+ Args:
176
+ url (str): Base URL.
177
+ endpoint (str): API path.
178
+ params (Optional[Dict[str, Any]]): Query parameters.
179
+
180
+ Returns:
181
+ Any: JSON response.
182
+ """
183
+ return self._request("get", url, endpoint, params=params)
184
+
185
+ def _post(
186
+ self,
187
+ url: str,
188
+ endpoint: str,
189
+ data: Optional[Dict[str, Any]] = None,
190
+ json: Optional[Dict[str, Any]] = None,
191
+ params: Optional[Dict[str, Any]] = None,
192
+ ) -> Any:
193
+ """
194
+ Send a POST request to the given endpoint.
195
+
196
+ Args:
197
+ url (str): Base URL.
198
+ endpoint (str): API path.
199
+ data (Optional[Dict[str, Any]]): Form-encoded body.
200
+ json (Optional[Dict[str, Any]]): JSON body.
201
+ params (Optional[Dict[str, Any]]): Query parameters.
202
+
203
+ Returns:
204
+ Any: JSON response.
205
+ """
206
+ return self._request("post", url, endpoint, data=data, json=json, params=params)
207
+
208
+ def _put(
209
+ self,
210
+ url: str,
211
+ endpoint: str,
212
+ data: Optional[Dict[str, Any]] = None,
213
+ json: Optional[Dict[str, Any]] = None,
214
+ params: Optional[Dict[str, Any]] = None,
215
+ ) -> Any:
216
+ """
217
+ Send a PUT request to the given endpoint.
218
+
219
+ Args:
220
+ url (str): Base URL.
221
+ endpoint (str): API path.
222
+ data (Optional[Dict[str, Any]]): Form-encoded body.
223
+ json (Optional[Dict[str, Any]]): JSON body.
224
+ params (Optional[Dict[str, Any]]): Query parameters.
225
+
226
+ Returns:
227
+ Any: JSON response.
228
+ """
229
+ return self._request("put", url, endpoint, data=data, json=json, params=params)