driftmon 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
driftmon/__init__.py ADDED
File without changes
@@ -0,0 +1,82 @@
1
+ import os
2
+ import smtplib
3
+ import time
4
+ from datetime import datetime
5
+ from email.mime.multipart import MIMEMultipart
6
+ from email.mime.text import MIMEText
7
+
8
+ from jinja2 import Environment, FileSystemLoader, select_autoescape
9
+
10
+ _EMAIL_SUBJECT = "Data Drift Alert"
11
+ _RETRIES = 5
12
+ _BASE = 1
13
+
14
+ _SMTP_SETTINGS = {
15
+ "gmail.com": {"server": "smtp.gmail.com", "ssl_port": 465, "tls_port": 587},
16
+ "yahoo.com": {"server": "smtp.mail.yahoo.com", "ssl_port": 465, "tls_port": 587},
17
+ "outlook.com": {"server": "smtp.office365.com", "ssl_port": 587, "tls_port": 587},
18
+ }
19
+
20
+
21
+ class Email:
22
+ def __init__(
23
+ self,
24
+ sender: str,
25
+ password: str,
26
+ receiver: str,
27
+ drift_report: str,
28
+ use_ssl: bool = False,
29
+ ):
30
+ self.sender = sender
31
+ self.password = password
32
+ self.receiver = receiver
33
+ self.drift_report = drift_report
34
+ self.use_ssl = use_ssl
35
+ template_dir = os.path.join(os.path.dirname(__file__), "templates")
36
+ self.env = Environment(
37
+ loader=FileSystemLoader(template_dir), autoescape=select_autoescape(["html"])
38
+ )
39
+
40
+ domain = sender.split("@")[-1]
41
+ settings = _SMTP_SETTINGS.get(domain)
42
+ if not settings:
43
+ raise ValueError(f"Unsupported email provider: {domain}")
44
+ self.smtp_server = settings["server"]
45
+ self.smtp_port = settings["ssl_port"] if use_ssl else settings["tls_port"]
46
+
47
+ def send_email(self, subject=_EMAIL_SUBJECT):
48
+ if self.drift_report is None:
49
+ raise ValueError("Drift report must be provided!")
50
+
51
+ template = self.env.get_template("drift_alert.html")
52
+ html_body = template.render(
53
+ timestamp=datetime.now().isoformat(), drift_report=self.drift_report
54
+ )
55
+
56
+ message = MIMEMultipart("alternative")
57
+ message["From"] = self.sender
58
+ message["To"] = self.receiver
59
+ message["Subject"] = subject
60
+ message.attach(MIMEText(html_body, "html"))
61
+
62
+ attempt = 0
63
+ while attempt < _RETRIES:
64
+ try:
65
+ if self.use_ssl:
66
+ with smtplib.SMTP_SSL(self.smtp_server, self.smtp_port) as server:
67
+ server.login(self.sender, self.password)
68
+ server.sendmail(self.sender, self.receiver, message.as_string())
69
+ else:
70
+ with smtplib.SMTP(self.smtp_server, self.smtp_port) as server:
71
+ server.ehlo()
72
+ server.starttls()
73
+ server.login(self.sender, self.password)
74
+ server.sendmail(self.sender, self.receiver, message.as_string())
75
+ print(f"✅ Email successfully sent to {self.receiver}")
76
+ break
77
+ except Exception as e:
78
+ attempt += 1
79
+ print(f"Attempt {attempt} failed: {e}")
80
+ time.sleep(_BASE * attempt)
81
+ else:
82
+ print(f"Failed to send email after {_RETRIES} retries")
@@ -0,0 +1,50 @@
1
+ import time
2
+ from datetime import datetime
3
+
4
+ from slack_sdk import WebClient
5
+ from slack_sdk.errors import SlackApiError
6
+
7
+ _RETRIES = 5
8
+ _BASE = 1
9
+
10
+
11
+ class Slack:
12
+ """
13
+ Slack alert handler for sending data drift notifications.
14
+
15
+ Args:
16
+ token (str): Slack API token.
17
+ channel (str): Slack channel ID or name.
18
+ drift_report (str): The drift report to send as a message.
19
+
20
+ Methods:
21
+ send_notification():
22
+ Sends a notification message to Slack.
23
+ """
24
+
25
+ def __init__(self, token: str, channel: str, drift_report: str):
26
+ self.token = token
27
+ self.channel = channel
28
+ self.drift_report = drift_report
29
+
30
+ def send_notification(self):
31
+ client = WebClient(token=self.token)
32
+
33
+ attempt = 0
34
+ while attempt < _RETRIES:
35
+ try:
36
+ message = (
37
+ f"*Data Drift Alert*\n"
38
+ f"Timestamp: `{datetime.now().isoformat()}`\n"
39
+ f"Detected drift in the following tables:\n"
40
+ f"```{self.drift_report}```"
41
+ )
42
+ response = client.chat_postMessage(channel=self.channel, text=message)
43
+ print("✅ Slack notification sent")
44
+ break
45
+ except SlackApiError as e:
46
+ attempt += 1
47
+ print(f"Error sending message: {e.response['error']}")
48
+ time.sleep(_BASE * attempt)
49
+ else:
50
+ print(f"Failed to send Slack notification after {_RETRIES} retries")
driftmon/cli.py ADDED
@@ -0,0 +1,303 @@
1
+ import subprocess
2
+ import sys
3
+
4
+ import typer
5
+ from prompt_toolkit import PromptSession
6
+ from rich import print
7
+ from rich.progress import Progress, SpinnerColumn, TextColumn
8
+ from typer import Typer
9
+
10
+ from driftmon.config import load_conn_params, save_conn_params
11
+ from driftmon.utils import TimedConnection
12
+
13
+ app = Typer()
14
+ timed_conn = None
15
+
16
+ email_client = None
17
+ slack_client = None
18
+
19
+
20
+ def create_connector_and_params(params):
21
+ """
22
+ Instantiate connector parameters for database and data warehouse connections.
23
+
24
+ Supported connectors include MySQL, PostgreSQL, BigQuery, and Snowflake.
25
+ """
26
+ conn_type = params["type"]
27
+ if conn_type == "bigquery":
28
+ from driftmon.connector.bigquery_connector import BigQueryConn
29
+
30
+ connector = BigQueryConn(
31
+ project=params["project"], credentials_path=params["credentials_path"]
32
+ )
33
+ return connector
34
+ elif conn_type == "snowflake":
35
+ from driftmon.connector.snowflake_connector import SnowflakeConn
36
+
37
+ connector = SnowflakeConn(
38
+ user=params["user"],
39
+ password=params["password"],
40
+ account=params["account"],
41
+ database=params["database"],
42
+ warehouse=params["warehouse"],
43
+ schema=params.get("schema"),
44
+ role=params.get("role"),
45
+ )
46
+ return connector
47
+ elif conn_type == "postgres":
48
+ from driftmon.connector.postgres_connector import PostgresConn
49
+
50
+ connector = PostgresConn(
51
+ host=params["host"],
52
+ port=params["port"],
53
+ user=params["user"],
54
+ password=params["password"],
55
+ database=params["database"],
56
+ )
57
+ return connector
58
+ elif conn_type == "mysql":
59
+ from driftmon.connector.mysql_connector import MySQLConn
60
+
61
+ connector = MySQLConn(
62
+ host=params["host"],
63
+ port=params["port"],
64
+ user=params["user"],
65
+ password=params["password"],
66
+ database=params["database"],
67
+ )
68
+ return connector
69
+ else:
70
+ raise ValueError(f"Unsupported connection type: {conn_type}")
71
+
72
+
73
+ def setup_alert_clients(tables=None):
74
+ """Configure email alerts for data drift detection."""
75
+ from driftmon.alerts.email_alert import Email
76
+ from driftmon.alerts.slack_alert import Slack
77
+
78
+ global email_client, slack_client
79
+ alert_config = load_conn_params()
80
+ if alert_config.get("email", {}).get("enabled"):
81
+ email_client = Email(
82
+ sender_email=alert_config["email"]["sender"],
83
+ receiver_email=alert_config["email"]["recipient"],
84
+ sender_password=alert_config["email"]["password"],
85
+ tables=tables or alert_config.get("tables", []),
86
+ )
87
+ if alert_config.get("slack", {}).get("enabled"):
88
+ slack_client = Slack(
89
+ token=alert_config["slack"]["token"],
90
+ channel=alert_config["slack"]["channel"],
91
+ tables=tables or alert_config.get("tables", []),
92
+ )
93
+
94
+
95
+ @app.command()
96
+ def configure():
97
+ """
98
+ Initialize all connection configurations for the application.
99
+
100
+ This includes:
101
+ - Database/Data warehouse connections (host, port, credentials, etc.)
102
+ - Email alerting setup (SMTP, recipients, etc.)
103
+
104
+ Run this command at startup before any data operations.
105
+ """
106
+ conn_type = typer.prompt("Connection type (snowflake, bigquery, mysql, postgres)")
107
+ params = {"type": conn_type}
108
+ if conn_type == "bigquery":
109
+ params["project"] = typer.prompt("BigQuery project")
110
+ params["credentials_path"] = typer.prompt("BigQuery credentials JSON path")
111
+ datasets_input = typer.prompt(
112
+ "Comma-separated list of BigQuery datasets to monitor", default=""
113
+ )
114
+ params["datasets"] = [d.strip() for d in datasets_input.split(",") if d.strip()]
115
+ elif conn_type == "snowflake":
116
+ params["user"] = typer.prompt("Snowflake username")
117
+ params["password"] = typer.prompt("Snowflake password", hide_input=True)
118
+ params["account"] = typer.prompt("Snowflake account")
119
+ params["database"] = typer.prompt("Snowflake database")
120
+ params["warehouse"] = typer.prompt("Snowflake warehouse")
121
+ schemas_input = typer.prompt(
122
+ "Comma-separated list of Snowflake schemas to monitor", default=""
123
+ )
124
+ params["schemas"] = [s.strip() for s in schemas_input.split(",") if s.strip()]
125
+ params["schema"] = params["schemas"][0] if params["schemas"] else ""
126
+ params["role"] = typer.prompt("Snowflake role", default="")
127
+ elif conn_type in ("postgres", "mysql"):
128
+ params["host"] = typer.prompt(f"{conn_type.capitalize()} host")
129
+ params["port"] = typer.prompt(f"{conn_type.capitalize()} port", type=int)
130
+ params["user"] = typer.prompt(f"{conn_type.capitalize()} username")
131
+ params["password"] = typer.prompt(f"{conn_type.capitalize()} password", hide_input=True)
132
+ db = typer.prompt(f"{conn_type.capitalize()} database")
133
+ params["database"] = db
134
+ params["schema"] = db
135
+ else:
136
+ typer.secho("Unsupported connection type.", fg=typer.colors.RED)
137
+ raise typer.Exit()
138
+
139
+ alert_method = typer.prompt("Alert method (email, slack, both)", default="both")
140
+ email_enabled = alert_method in ("email", "both")
141
+ slack_enabled = alert_method in ("slack", "both")
142
+
143
+ email_config = {
144
+ "enabled": email_enabled,
145
+ "smtp_server": typer.prompt("SMTP server for email alerts", default="smtp.gmail.com")
146
+ if email_enabled
147
+ else "",
148
+ "sender": typer.prompt("Sender email address") if email_enabled else "",
149
+ "recipient": typer.prompt("Recipient email address") if email_enabled else "",
150
+ "password": typer.prompt("Sender email password", hide_input=True) if email_enabled else "",
151
+ "smtp_port": typer.prompt("Enter SMTP server port", default=465) if email_enabled else "",
152
+ }
153
+
154
+ slack_config = {
155
+ "enabled": slack_enabled,
156
+ "token": typer.prompt("Slack bot token", hide_input=True) if slack_enabled else "",
157
+ "channel": typer.prompt("Slack channel") if slack_enabled else "",
158
+ }
159
+
160
+ tables_input = typer.prompt("Comma-separated list of tables to monitor for alerts", default="")
161
+ tables = [t.strip() for t in tables_input.split(",") if t.strip()]
162
+
163
+ params["email"] = email_config
164
+ params["slack"] = slack_config
165
+ params["tables"] = tables
166
+
167
+ save_conn_params(params)
168
+ typer.secho("Connection established.", fg=typer.colors.BRIGHT_GREEN)
169
+
170
+
171
+ @app.command()
172
+ def monitoring():
173
+ """Append Baseline stats profiling to jsonl file"""
174
+ from driftmon.detect.monitoring import save_profile
175
+
176
+ params = load_conn_params()
177
+ if not params:
178
+ typer.secho("Run 'configure' first", fg=typer.colors.RED)
179
+ raise typer.Exit()
180
+
181
+ tables = params.get("tables", [])
182
+ tables = [t for t in tables if t]
183
+ datasets = params.get("datasets", [])
184
+ datasets = [d for d in datasets if d]
185
+ if params["type"] in ("mysql", "postgres"):
186
+ schema = params.get("schema", [])
187
+ else:
188
+ schemas = params.get("schemas", [])
189
+ schemas = [s for s in schemas if s]
190
+
191
+ timeout = 600
192
+
193
+ global timed_conn
194
+ connector = create_connector_and_params(params)
195
+
196
+ if timed_conn is None or not timed_conn.is_valid():
197
+ connector.__enter__()
198
+ timed_conn = TimedConnection(connector, timeout=timeout)
199
+ typer.secho(
200
+ f"New connection established. Session valid for {timeout} seconds.",
201
+ fg=typer.colors.BRIGHT_GREEN,
202
+ )
203
+ else:
204
+ typer.secho("Using existing valid connection.", fg=typer.colors.BRIGHT_MAGENTA)
205
+
206
+ active_connector = timed_conn.get_conn()
207
+
208
+ with Progress(
209
+ SpinnerColumn(), TextColumn("[progress.description]{task.description}")
210
+ ) as progress:
211
+ monitor_task = progress.add_task(description="Running monitoring...", total=None)
212
+
213
+ result = save_profile(
214
+ conn_type=params["type"],
215
+ connector=active_connector,
216
+ table_names=tables if tables else None,
217
+ schema=schema if params["type"] in ("mysql", "postgres") else None,
218
+ schemas=schemas if params["type"] == "snowflake" else None,
219
+ datasets=datasets if params["type"] == "bigquery" else None,
220
+ )
221
+ print(result)
222
+ typer.secho(
223
+ "Baseline stats computed and hashes appended to monitoring_history.jsonl!",
224
+ fg=typer.colors.BRIGHT_GREEN,
225
+ )
226
+ progress.stop_task(monitor_task)
227
+
228
+
229
+ @app.command()
230
+ def detect_drift():
231
+ """Detect data drift if any and send alerts (Email or Slack)"""
232
+ params = load_conn_params()
233
+ tables = params.get("tables", []) if params else []
234
+ with Progress(
235
+ SpinnerColumn(), TextColumn("[progress.description]{task.description}")
236
+ ) as progress:
237
+ detect_alert_task = progress.add_task(
238
+ description="Running drift detection and alerting...", total=None
239
+ )
240
+ setup_alert_clients(tables=tables)
241
+ if email_client:
242
+ email_client.send_email()
243
+ if slack_client:
244
+ slack_client.send_notification()
245
+ typer.secho("Drift detection completed and Email sent!", fg=typer.colors.BRIGHT_GREEN)
246
+ progress.start_task(detect_alert_task)
247
+
248
+
249
+ @app.command()
250
+ def dashboard():
251
+ """Launch the Streamlit dashboard in the background."""
252
+ subprocess.Popen([sys.executable, "-m", "streamlit", "run", "src/dashboard/main.py"])
253
+ typer.secho("Dashboard started in the background.", fg=typer.colors.BRIGHT_BLUE)
254
+
255
+
256
+ def main_shell():
257
+ """Main Function for CLI runs."""
258
+ session = PromptSession()
259
+ typer.secho(
260
+ "Welcome to Data Drift Detector shell! Type 'exit' or 'quit' to exit cli",
261
+ fg=typer.colors.GREEN,
262
+ )
263
+ typer.secho("Available commands:", fg=typer.colors.BRIGHT_CYAN)
264
+ typer.secho(
265
+ " configure - Set up a data source connection and alert configs",
266
+ fg=typer.colors.YELLOW,
267
+ )
268
+ typer.secho(" monitoring - Run monitoring on your data", fg=typer.colors.YELLOW)
269
+ typer.secho(
270
+ " dashboard - Launch the Streamlit dashboard in the background",
271
+ fg=typer.colors.YELLOW,
272
+ )
273
+ typer.secho(" detect-drift - Detect data drift and send alerts", fg=typer.colors.YELLOW)
274
+ typer.secho(" help - Show Typer help", fg=typer.colors.YELLOW)
275
+ typer.secho(" exit/quit - Quit the shell\n", fg=typer.colors.YELLOW)
276
+ while True:
277
+ try:
278
+ text = session.prompt("ddd> ")
279
+ if text.strip() in {"exit", "quit"}:
280
+ typer.secho("Goodbye!", fg=typer.colors.BRIGHT_YELLOW)
281
+ break
282
+ elif text.strip() == "help":
283
+ app(args=["--help"], standalone_mode=False)
284
+ elif text.strip():
285
+ import shlex
286
+
287
+ args = shlex.split(text)
288
+ app(args=args, standalone_mode=False)
289
+ except KeyboardInterrupt:
290
+ continue
291
+ except EOFError:
292
+ break
293
+ except Exception as e:
294
+ typer.secho(f"Error: {e}", fg=typer.colors.RED)
295
+
296
+
297
+ if __name__ == "__main__": # Entry point
298
+ import sys
299
+
300
+ if len(sys.argv) > 1:
301
+ app()
302
+ else:
303
+ main_shell()
driftmon/config.py ADDED
@@ -0,0 +1,16 @@
1
+ import yaml
2
+
3
+
4
+ def save_conn_params(params, filename="params.yaml"):
5
+ """YAML file for connection parameters."""
6
+ with open(filename, "w") as f:
7
+ yaml.safe_dump(params, f)
8
+
9
+
10
+ def load_conn_params(filename="params.yaml"):
11
+ """Read YAML file containing params."""
12
+ try:
13
+ with open(filename, "r") as f:
14
+ return yaml.safe_load(f) or {}
15
+ except FileNotFoundError:
16
+ return {}
File without changes