bw-essentials-core 0.0.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of bw-essentials-core might be problematic. Click here for more details.

File without changes
File without changes
@@ -0,0 +1,19 @@
1
+ from enum import Enum
2
+
3
+
4
+ class Services(Enum):
5
+ """
6
+ Enumeration of available services.
7
+ """
8
+ REBALANCE_BUSINESS = 'Rebalance_Business'
9
+ REBALANCE = 'Rebalance'
10
+ MASTER_DATA = 'Master_Data'
11
+ MARKET_PRICER = "Market_Pricer"
12
+ BROKER = 'Broker'
13
+ USER_PORTFOLIO = 'User_Portfolio'
14
+ TRADE_PLACEMENT = 'Trade_Placement'
15
+ CONTENT = 'Portfolio_Content'
16
+ NOTIFICATION = 'Notification'
17
+ USER_REPORTING = 'User_Reporting'
18
+ PAYMENT = 'Payment'
19
+ MODEL_PORTFOLIO = "Model_Portfolio"
File without changes
@@ -0,0 +1,282 @@
1
+ """
2
+ Module for interacting with LakeFS and S3.
3
+
4
+ This module defines the `LakeFS` class, which provides a high-level interface for managing
5
+ version-controlled data in LakeFS using an underlying S3-compatible object store.
6
+
7
+ Key Features:
8
+ - Upload and download files to/from LakeFS.
9
+ - Sync entire directories from LakeFS to local and vice versa.
10
+ - Check file existence in LakeFS.
11
+ - Commit changes to branches in LakeFS.
12
+ - Retrieve the latest or N latest files from a given LakeFS path.
13
+ - Delete LakeFS directory paths (prefixes).
14
+
15
+ Requires:
16
+ - `lakefs` client
17
+ - `S3Utils` from `bw_essentials.s3_utils`
18
+
19
+ Intended for automation of LakeFS operations in data pipelines or services.
20
+ """
21
+
22
+ import concurrent
23
+ import logging
24
+ import os
25
+ import sys
26
+ from concurrent.futures import ThreadPoolExecutor
27
+ from datetime import datetime
28
+ from importlib.util import spec_from_file_location, module_from_spec
29
+ from pathlib import Path
30
+
31
+ from lakefs.client import Client
32
+ from bw_essentials.s3_utils.s3_utils import S3Utils
33
+
34
+ logger = logging.getLogger(__name__)
35
+
36
+
37
+ class DataLoch:
38
+ """
39
+ LakeFS class for managing versioned data operations on LakeFS over S3.
40
+ """
41
+
42
+ def __init__(self, access_key, secret_key, host):
43
+ """
44
+ Initialize a LakeFS instance with S3 and LakeFS client setup.
45
+
46
+ Args:
47
+ access_key (str): Access key for authentication with LakeFS/S3.
48
+ secret_key (str): Secret key for authentication with LakeFS/S3.
49
+ host (str): LakeFS server URL or S3 endpoint.
50
+ """
51
+ logger.info("Initializing LakeFS instance")
52
+ self.access_key = self._get_env_var("DATA_LAKE_ACCESS_KEY")
53
+ self.secret_key = self._get_env_var("DATA_LAKE_SECRET_KEY")
54
+ self.host = self._get_env_var("DATA_LAKE_HOST_URL")
55
+ self.client = self._get_lakefs_client()
56
+ self.s3 = S3Utils()
57
+
58
+ def _get_env_var(self, key: str) -> str:
59
+ """
60
+ Fetch a required variable from bw_config.py located in the root directory.
61
+
62
+ Raises:
63
+ FileNotFoundError: If bw_config.py is not found.
64
+ AttributeError: If the requested key is not defined in the config.
65
+
66
+ Returns:
67
+ str: The value of the config variable.
68
+ """
69
+ config_path = os.path.join(os.getcwd(), "bw_config.py")
70
+
71
+ if not os.path.exists(config_path):
72
+ raise FileNotFoundError("`bw_config.py` file not found in the root directory. "
73
+ "Please ensure the config file exists.")
74
+
75
+ spec = spec_from_file_location("bw_config", config_path)
76
+ bw_config = module_from_spec(spec)
77
+ sys.modules["bw_config"] = bw_config
78
+ spec.loader.exec_module(bw_config)
79
+
80
+ if not hasattr(bw_config, key):
81
+ raise AttributeError(f"`{key}` not found in bw_config.py. Please define it in the config.")
82
+
83
+ return getattr(bw_config, key)
84
+
85
+ def _get_lakefs_client(self):
86
+ """
87
+ Get LakeFS client.
88
+
89
+ Returns:
90
+ lakefs.client.Client: Authenticated LakeFS client.
91
+ """
92
+ logger.info("Initializing LakeFS client")
93
+ return Client(host=self.host, username=self.access_key, password=self.secret_key)
94
+
95
+ def _get_branch(self, repository, branch):
96
+ """
97
+ Return LakeFS branch object.
98
+
99
+ Args:
100
+ repository (str): Repository name.
101
+ branch (str): Branch name.
102
+
103
+ Returns:
104
+ lakefs.Branch: LakeFS branch object.
105
+ """
106
+ from lakefs import Repository
107
+ return Repository(repository, client=self.client).branch(branch_id=branch)
108
+
109
+ def _get_meta_data(self, repository, branch):
110
+ """
111
+ Return metadata dictionary for commit operation.
112
+
113
+ Args:
114
+ repository (str): Repository name.
115
+ branch (str): Branch name.
116
+
117
+ Returns:
118
+ dict: Metadata for commit.
119
+ """
120
+ return {
121
+ 'source': os.getcwd(),
122
+ 'branch': branch,
123
+ 'repository': repository,
124
+ 'timestamp': str(datetime.now())
125
+ }
126
+
127
+ def upload_data(self, repository, branch, local_file_path, file_name, commit=True):
128
+ """
129
+ Upload a single file to a LakeFS branch and optionally commit the change.
130
+
131
+ Args:
132
+ repository (str): LakeFS repository name.
133
+ branch (str): Target branch in the repository.
134
+ local_file_path (str): Full local path of the file to upload.
135
+ file_name (str): File name to use in LakeFS.
136
+ commit (bool): Whether to commit the change after upload. Defaults to True.
137
+ """
138
+ logger.info(f"Uploading file {file_name} from {local_file_path}")
139
+ branch_obj = self._get_branch(repository, branch)
140
+ self.s3.upload_file(repository, local_file_path, f"{branch}/{file_name}", "application/octet-stream")
141
+ if commit:
142
+ commit_response = branch_obj.commit(
143
+ message=f"{file_name=} uploaded to data lake.",
144
+ metadata=self._get_meta_data(repository, branch)
145
+ )
146
+ logger.info(f"Changes committed: {commit_response=}")
147
+
148
+ def download_data(self, repository, branch, local_file_path, file_name):
149
+ """
150
+ Download a single file from a LakeFS branch to a local path.
151
+
152
+ Args:
153
+ repository (str): LakeFS repository name.
154
+ branch (str): Branch in the repository.
155
+ local_file_path (str): Full local path to save the file.
156
+ file_name (str): Name of the file in LakeFS.
157
+ """
158
+ logger.info(f"Downloading file {file_name} to {local_file_path}")
159
+ self.s3.download_file(repository, f"{branch}/{file_name}", local_file_path)
160
+
161
+ def file_exists(self, repository, branch, filename):
162
+ """
163
+ Check whether a file exists in a LakeFS repository branch.
164
+
165
+ Args:
166
+ repository (str): Repository name.
167
+ branch (str): Branch name.
168
+ filename (str): File name to check.
169
+
170
+ Returns:
171
+ bool: True if file exists, False otherwise.
172
+ """
173
+ try:
174
+ self.s3.s3_instance.head_object(Bucket=repository, Key=f"{branch}/{filename}")
175
+ return True
176
+ except Exception:
177
+ return False
178
+
179
+ def sync_dir(self, repository, branch, server_path, local_path=None):
180
+ """
181
+ Sync a directory from LakeFS (S3) to the local file system.
182
+
183
+ Args:
184
+ repository (str): Repository name.
185
+ branch (str): Branch name.
186
+ server_path (str): Path under the branch to sync from.
187
+ local_path (str, optional): Local destination directory. Defaults to current dir.
188
+ """
189
+ logger.info(f"Syncing directory from {server_path} to {local_path}")
190
+ prefix = f"{branch}/{server_path}"
191
+ args = []
192
+ for obj in self.s3.list_bucket_objects(repository, prefix):
193
+ rel_path = os.path.relpath(obj['Key'], prefix)
194
+ target = rel_path if not local_path else os.path.join(local_path, rel_path)
195
+ if os.path.exists(target) or obj['Key'].endswith('/'):
196
+ continue
197
+ os.makedirs(os.path.dirname(target), exist_ok=True)
198
+ args.append((repository, obj['Key'], target))
199
+ with ThreadPoolExecutor(max_workers=4) as executor:
200
+ [executor.submit(self.s3.download_file, repo, key, target) for repo, key, target in args]
201
+ logger.info("Directory sync complete")
202
+
203
+ def upload_dir(self, repository, branch, local_path, server_path):
204
+ """
205
+ Upload all files from a local directory to LakeFS and commit them.
206
+
207
+ Args:
208
+ repository (str): Repository name.
209
+ branch (str): Target branch.
210
+ local_path (str): Local directory path.
211
+ server_path (str): Target path in LakeFS under the branch.
212
+ """
213
+ logger.info(f"Uploading directory {local_path} to {server_path}")
214
+ directory = Path(local_path)
215
+ args = [
216
+ (repository, branch, str(file), f"{server_path}/{file.name}", False)
217
+ for file in directory.iterdir() if file.is_file()
218
+ ]
219
+ with concurrent.futures.ThreadPoolExecutor(max_workers=8) as executor:
220
+ [executor.submit(self.upload_data, repo, br, local, output, commit)
221
+ for repo, br, local, output, commit in args]
222
+ branch_obj = self._get_branch(repository, branch)
223
+ commit_response = branch_obj.commit(
224
+ message=f"{local_path} uploaded to data lake.",
225
+ metadata=self._get_meta_data(repository, branch)
226
+ )
227
+ logger.info(f"Directory uploaded and committed: {commit_response}")
228
+
229
+ def get_latest_file(self, repository, branch, server_path, local_file_path):
230
+ """
231
+ Get and download the latest modified file from a LakeFS path.
232
+
233
+ Args:
234
+ repository (str): Repository name.
235
+ branch (str): Branch name.
236
+ server_path (str): Path in LakeFS under the branch.
237
+ local_file_path (str): Local destination path to save the file.
238
+ """
239
+ logger.info(f"Fetching latest file from {server_path}")
240
+ prefix = f"{branch}/{server_path}"
241
+ key = self.s3.get_latest_file_by_prefix(repository, prefix)
242
+ if key:
243
+ self.s3.download_file(repository, key, local_file_path)
244
+
245
+ def get_latest_n_files(self, repository, branch, server_path, local_file_path, count=1):
246
+ """
247
+ Download the latest N files from a given LakeFS path.
248
+
249
+ Args:
250
+ repository (str): Repository name.
251
+ branch (str): Branch name.
252
+ server_path (str): Path in LakeFS under the branch.
253
+ local_file_path (str): Local directory to store downloaded files.
254
+ count (int, optional): Number of recent files to download. Defaults to 1.
255
+ """
256
+ logger.info(f"Fetching latest {count} files from {server_path}")
257
+ prefix = f"{branch}/{server_path}"
258
+ keys = self.s3.get_latest_n_files(repository, prefix, count=count)
259
+ os.makedirs(os.path.dirname(local_file_path), exist_ok=True)
260
+ args = []
261
+ for key in keys:
262
+ rel_path = os.path.relpath(key, prefix)
263
+ target = os.path.join(local_file_path, rel_path)
264
+ if not os.path.exists(target):
265
+ args.append((repository, key, target))
266
+ with concurrent.futures.ThreadPoolExecutor(max_workers=4) as executor:
267
+ [executor.submit(self.s3.download_file, repo, key, target) for repo, key, target in args]
268
+ logger.info("Downloaded latest files")
269
+
270
+ def delete_dir(self, repository, branch, server_path):
271
+ """
272
+ Delete all files under a given path (prefix) in a LakeFS branch.
273
+
274
+ Args:
275
+ repository (str): Repository name.
276
+ branch (str): Branch name.
277
+ server_path (str): Path in LakeFS to delete under the branch.
278
+ """
279
+ logger.info(f"Deleting directory {server_path}")
280
+ prefix = f"{branch}/{server_path}"
281
+ self.s3.delete_files_by_prefix(repository, prefix)
282
+ logger.info(f"Deleted all files under prefix {prefix}")
File without changes
@@ -0,0 +1,243 @@
1
+ """
2
+ email_client.py
3
+
4
+ This module defines the EmailClient class for sending emails using SMTP. It provides
5
+ methods to send emails with or without file attachments and supports multiple recipients
6
+ and CC addresses. The email body can be plain text or HTML.
7
+
8
+ Usage:
9
+ client = EmailClient(
10
+ smtp_host="smtp.example.com",
11
+ smtp_port=587,
12
+ smtp_username="user",
13
+ smtp_password="pass",
14
+ sender_email="from@example.com",
15
+ sender_name="My Service"
16
+ )
17
+
18
+ client.send_email_without_attachment(
19
+ to_addresses=["to@example.com"],
20
+ cc_addresses=["cc@example.com"],
21
+ subject="Test Email",
22
+ body="Hello, this is a test email!"
23
+ )
24
+
25
+ client.send_email_with_attachment(
26
+ to_addresses="to@example.com",
27
+ cc_addresses=None,
28
+ subject="Report",
29
+ body="<h1>Monthly Report</h1>",
30
+ attachment_path="/path/to/report.csv"
31
+ )
32
+ """
33
+
34
+ import logging
35
+ import os
36
+ import smtplib
37
+ import sys
38
+ from importlib.util import spec_from_file_location, module_from_spec
39
+ from typing import Optional, List, Union
40
+ from email.mime.multipart import MIMEMultipart
41
+ from email.mime.text import MIMEText
42
+ from email.mime.base import MIMEBase
43
+ from email import encoders
44
+
45
+ logger = logging.getLogger(__name__)
46
+
47
+
48
+ class EmailClient:
49
+ """
50
+ A reusable and configurable email client for sending emails using SMTP.
51
+
52
+ Attributes:
53
+ smtp_host (str): SMTP server hostname or IP address.
54
+ smtp_port (int): SMTP server port (commonly 587 for TLS).
55
+ smtp_username (str): SMTP username for authentication.
56
+ smtp_password (str): SMTP password for authentication.
57
+ sender_email (str): Email address used as the sender.
58
+ sender_name (str): Display name for the sender.
59
+ """
60
+
61
+ def __init__(
62
+ self,
63
+ sender_email: str,
64
+ sender_name: str,
65
+ ):
66
+ """
67
+ Initialize the EmailClient with SMTP server credentials.
68
+
69
+ Args:
70
+ sender_email (str): Sender's email address.
71
+ sender_name (str): Display name to show in "From" field.
72
+ """
73
+ self.smtp_host = self._get_env_var("SMTP_HOST")
74
+ self.smtp_port = self._get_env_var("SMTP_PORT")
75
+ self.smtp_username = self._get_env_var("SMTP_USERNAME")
76
+ self.smtp_password = self._get_env_var("SMTP_PASSWORD")
77
+ self.sender_email = sender_email
78
+ self.sender_name = sender_name
79
+ logger.info("EmailClient initialized with SMTP host: %s", self.smtp_host)
80
+
81
+ def _get_env_var(self, key: str) -> str:
82
+ """
83
+ Fetch a required variable from bw_config.py located in the root directory.
84
+
85
+ Raises:
86
+ FileNotFoundError: If bw_config.py is not found.
87
+ AttributeError: If the requested key is not defined in the config.
88
+
89
+ Returns:
90
+ str: The value of the config variable.
91
+ """
92
+ config_path = os.path.join(os.getcwd(), "bw_config.py")
93
+
94
+ if not os.path.exists(config_path):
95
+ raise FileNotFoundError("`bw_config.py` file not found in the root directory. "
96
+ "Please ensure the config file exists.")
97
+
98
+ spec = spec_from_file_location("bw_config", config_path)
99
+ bw_config = module_from_spec(spec)
100
+ sys.modules["bw_config"] = bw_config
101
+ spec.loader.exec_module(bw_config)
102
+
103
+ if not hasattr(bw_config, key):
104
+ raise AttributeError(f"`{key}` not found in bw_config.py. Please define it in the config.")
105
+
106
+ return getattr(bw_config, key)
107
+
108
+ def send_email_without_attachment(
109
+ self,
110
+ to_addresses: Union[str, List[str]],
111
+ cc_addresses: Union[str, List[str], None],
112
+ subject: str,
113
+ body: str,
114
+ is_html: bool = True,
115
+ ):
116
+ """
117
+ Send an email without any attachments.
118
+
119
+ Args:
120
+ to_addresses (Union[str, List[str]]): One or more recipient email addresses.
121
+ cc_addresses (Union[str, List[str], None]): One or more CC addresses (optional).
122
+ subject (str): Email subject line.
123
+ body (str): Email body content.
124
+ is_html (bool): If True, body is interpreted as HTML; otherwise, plain text.
125
+ """
126
+ logger.info("Preparing to send email without attachment | Subject: %s", subject)
127
+ self._send_email(to_addresses, cc_addresses, subject, body, None, is_html)
128
+
129
+ def send_email_with_attachment(
130
+ self,
131
+ to_addresses: Union[str, List[str]],
132
+ cc_addresses: Union[str, List[str], None],
133
+ subject: str,
134
+ body: str,
135
+ attachment_path: str,
136
+ is_html: bool = True,
137
+ ):
138
+ """
139
+ Send an email with a file attachment.
140
+
141
+ Args:
142
+ to_addresses (Union[str, List[str]]): One or more recipient email addresses.
143
+ cc_addresses (Union[str, List[str], None]): One or more CC addresses (optional).
144
+ subject (str): Email subject line.
145
+ body (str): Email body content.
146
+ attachment_path (str): Full path to the file to attach.
147
+ is_html (bool): If True, body is interpreted as HTML; otherwise, plain text.
148
+ """
149
+ logger.info(
150
+ "Preparing to send email with attachment | Subject: %s | Attachment: %s",
151
+ subject, attachment_path
152
+ )
153
+ self._send_email(to_addresses, cc_addresses, subject, body, attachment_path, is_html)
154
+
155
+ def _send_email(
156
+ self,
157
+ to_addresses: Union[str, List[str]],
158
+ cc_addresses: Union[str, List[str], None],
159
+ subject: str,
160
+ body: str,
161
+ attachment_path: Optional[str],
162
+ is_html: bool,
163
+ ):
164
+ """
165
+ Internal helper method to construct and send email messages.
166
+
167
+ Args:
168
+ to_addresses (Union[str, List[str]]): One or more recipient email addresses.
169
+ cc_addresses (Union[str, List[str], None]): CC addresses.
170
+ subject (str): Subject of the email.
171
+ body (str): Email body content.
172
+ attachment_path (Optional[str]): File path for the attachment, if any.
173
+ is_html (bool): True if the body is HTML-formatted.
174
+ """
175
+ msg = MIMEMultipart()
176
+ msg['From'] = f'"{self.sender_name}" <{self.sender_email}>'
177
+ msg['To'] = self._format_addresses(to_addresses)
178
+ msg['Cc'] = self._format_addresses(cc_addresses) if cc_addresses else ""
179
+ msg['Subject'] = subject
180
+
181
+ msg.attach(MIMEText(body, 'html' if is_html else 'plain'))
182
+ logger.debug("Email headers and body constructed")
183
+
184
+ if attachment_path:
185
+ if not os.path.exists(attachment_path):
186
+ logger.warning("Attachment file not found: %s", attachment_path)
187
+ else:
188
+ try:
189
+ with open(attachment_path, 'rb') as attachment:
190
+ part = MIMEBase('application', 'octet-stream')
191
+ part.set_payload(attachment.read())
192
+ encoders.encode_base64(part)
193
+ part.add_header('Content-Disposition', 'attachment',
194
+ filename=os.path.basename(attachment_path))
195
+ msg.attach(part)
196
+ logger.info("Attachment added: %s", os.path.basename(attachment_path))
197
+ except Exception as e:
198
+ logger.exception("Failed to read attachment file")
199
+ raise e
200
+
201
+ try:
202
+ with smtplib.SMTP(self.smtp_host, self.smtp_port) as server:
203
+ server.starttls()
204
+ server.login(self.smtp_username, self.smtp_password)
205
+ recipients = self._get_recipient_list(to_addresses, cc_addresses)
206
+ server.sendmail(self.sender_email, recipients, msg.as_string())
207
+ logger.info("Email sent to %s with CC %s", to_addresses, cc_addresses)
208
+ except Exception as e:
209
+ logger.exception("Failed to send email")
210
+ raise e
211
+
212
+ def _format_addresses(self, addresses: Union[str, List[str]]) -> str:
213
+ """
214
+ Convert a string or list of email addresses to a comma-separated string.
215
+
216
+ Args:
217
+ addresses (Union[str, List[str]]): Email addresses.
218
+
219
+ Returns:
220
+ str: Comma-separated email addresses.
221
+ """
222
+ if isinstance(addresses, list):
223
+ return ", ".join(addresses)
224
+ return addresses
225
+
226
+ def _get_recipient_list(
227
+ self,
228
+ to_addresses: Union[str, List[str]],
229
+ cc_addresses: Union[str, List[str], None]
230
+ ) -> List[str]:
231
+ """
232
+ Merge TO and CC addresses into a single list for email sending.
233
+
234
+ Args:
235
+ to_addresses (Union[str, List[str]]): Main recipients.
236
+ cc_addresses (Union[str, List[str], None]): CC recipients.
237
+
238
+ Returns:
239
+ List[str]: List of all email recipients.
240
+ """
241
+ to_list = to_addresses if isinstance(to_addresses, list) else [to_addresses]
242
+ cc_list = cc_addresses if isinstance(cc_addresses, list) else [cc_addresses] if cc_addresses else []
243
+ return to_list + cc_list
File without changes