BobrTools 0.1.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,3 @@
1
+ from .google_docs import GoogleDocs
2
+ from .db_connectors import SnowflakeConnector
3
+ from .telegram_client import TelegramClient
@@ -0,0 +1,131 @@
1
+ import socks
2
+ import socket
3
+ import logging
4
+ import pandas as pd
5
+ import snowflake.connector
6
+ from snowflake.connector.pandas_tools import write_pandas
7
+ from .helpers import get_env_variable
8
+
9
+ logger = logging.getLogger(__name__)
10
+
11
+
12
+ class SnowflakeConnector:
13
+ def __init__(
14
+ self, user=None, password=None, account=None, default_warehouse=None, default_database=None, default_schema=None,
15
+ default_role=None, use_proxy=True, proxy_host=None, proxy_port=None, proxy_username=None, proxy_password=None
16
+ ):
17
+ self.user = user or get_env_variable("SNOWFLAKE_USER")
18
+ self.password = password or get_env_variable("SNOWFLAKE_PASSWORD")
19
+ self.account = account or get_env_variable("SNOWFLAKE_ACCOUNT")
20
+ self.default_role = default_role or get_env_variable("SNOWFLAKE_ROLE")
21
+ self.default_warehouse = default_warehouse or get_env_variable("SNOWFLAKE_WAREHOUSE")
22
+ self.default_database = default_database or get_env_variable("SNOWFLAKE_DATABASE")
23
+ self.default_schema = default_schema or get_env_variable("SNOWFLAKE_SCHEMA")
24
+ self.use_proxy = use_proxy
25
+
26
+ if self.use_proxy:
27
+ self.default_socket = socket.socket
28
+ self.proxy_host = proxy_host or get_env_variable("PROXY_HOST")
29
+ self.proxy_port = proxy_port or int(get_env_variable("PROXY_PORT"))
30
+ self.proxy_username = proxy_username or get_env_variable("PROXY_USERNAME")
31
+ self.proxy_password = proxy_password or get_env_variable("PROXY_PASSWORD")
32
+
33
+ def init_proxy(self):
34
+ """
35
+ Configures a SOCKS5 proxy for Snowflake connections.
36
+ """
37
+ socks.set_default_proxy(
38
+ socks.SOCKS5,
39
+ addr=self.proxy_host,
40
+ port=self.proxy_port,
41
+ username=self.proxy_username,
42
+ password=self.proxy_password,
43
+ )
44
+ socket.socket = socks.socksocket
45
+
46
+ def reset_proxy(self):
47
+ """
48
+ Resets the proxy settings to default.
49
+ """
50
+ socket.socket = self.default_socket
51
+
52
+ def connect(self, role=None, warehouse=None, database=None, schema=None):
53
+ """
54
+ Establishes and returns a connection to Snowflake, with optional overrides
55
+ for warehouse, database, and schema.
56
+ :param role: Name of the Snowflake role to override.
57
+ :param warehouse: Name of the Snowflake warehouse to override.
58
+ :param database: Name of the Snowflake database to override.
59
+ :param schema: Name of the Snowflake schema to override.
60
+ :return: Snowflake connection object.
61
+ """
62
+ return snowflake.connector.connect(
63
+ user=self.user,
64
+ password=self.password,
65
+ account=self.account,
66
+ role=role or self.default_role,
67
+ warehouse=warehouse or self.default_warehouse,
68
+ database=database or self.default_database,
69
+ schema=schema or self.default_schema
70
+ )
71
+
72
+ def upload_dataframe(
73
+ self, dataframe, table_name, overwrite=True, auto_create_table=True, warehouse=None, database=None, schema=None
74
+ ):
75
+ """
76
+ Uploads a Pandas DataFrame to a Snowflake table, with optional overrides
77
+ for warehouse, database, and schema.
78
+ Temporarily enables proxy if configured.
79
+ :param dataframe: pandas DataFrame to upload.
80
+ :param table_name: Name of the target table in Snowflake.
81
+ :param overwrite: When true, and if auto_create_table is true, then it drops the table. Otherwise, it
82
+ :param auto_create_table: When true, will automatically create a table with corresponding columns for each
83
+ column in the passed in DataFrame. The table will not be created if it already exist
84
+ :param warehouse: Name of the Snowflake warehouse to override.
85
+ :param database: Name of the Snowflake database to override.
86
+ :param schema: Name of the Snowflake schema to override.
87
+ :return: None.
88
+ :raises Exception: If the upload fails.
89
+ """
90
+ try:
91
+ if self.use_proxy:
92
+ self.init_proxy()
93
+
94
+ conn = self.connect(warehouse=warehouse, database=database, schema=schema)
95
+ try:
96
+ success, nchunks, nrows, _ = write_pandas(
97
+ conn,
98
+ dataframe,
99
+ table_name,
100
+ overwrite=overwrite,
101
+ auto_create_table=auto_create_table
102
+ )
103
+ if success:
104
+ logger.info(
105
+ f"DataFrame successfully uploaded to table '{table_name}' in database '{database or self.default_database}' "
106
+ f"and schema '{schema or self.default_schema}'. Rows inserted: {nrows}")
107
+ else:
108
+ raise Exception("Failed to upload DataFrame to Snowflake.")
109
+ finally:
110
+ conn.close()
111
+ finally:
112
+ if self.use_proxy:
113
+ self.reset_proxy()
114
+
115
+ def get_dataframe(self, query, warehouse=None, database=None, schema=None):
116
+ try:
117
+ if self.use_proxy:
118
+ self.init_proxy()
119
+
120
+ conn = self.connect(warehouse=warehouse, database=database, schema=schema)
121
+ cursor = conn.cursor()
122
+ cursor.execute(query)
123
+ dataframe = pd.DataFrame(cursor.fetchall(), columns=[column[0] for column in cursor.description])
124
+ cursor.close()
125
+ conn.close()
126
+
127
+ return dataframe
128
+
129
+ finally:
130
+ if self.use_proxy:
131
+ self.reset_proxy()
@@ -0,0 +1,93 @@
1
+ import time
2
+ import gspread
3
+ import pandas as pd
4
+ from gspread import utils
5
+ from oauth2client.service_account import ServiceAccountCredentials
6
+ from tenacity import (
7
+ retry,
8
+ wait_incrementing,
9
+ stop_after_attempt,
10
+ )
11
+ from gspread_formatting import (
12
+ set_frozen,
13
+ format_cell_range,
14
+ format_cell_ranges,
15
+ CellFormat,
16
+ Color,
17
+ TextFormat,
18
+ NumberFormat,
19
+ )
20
+
21
+
22
+ class GoogleDocs:
23
+ def __init__(self, keyfile=None):
24
+ """
25
+ Initializes the GoogleDocs class with credentials for Google API.
26
+ :param keyfile: Path to the JSON file containing Google service account credentials.
27
+ Defaults to "./credentials.json" if not provided.
28
+ """
29
+ if keyfile is None:
30
+ keyfile = "./credentials.json"
31
+
32
+ scope = ["https://spreadsheets.google.com/feeds", "https://www.googleapis.com/auth/drive"]
33
+ credentials = ServiceAccountCredentials.from_json_keyfile_name(keyfile, scope)
34
+ self.spreadsheet = None
35
+ self.gc = gspread.authorize(credentials)
36
+
37
+ @staticmethod
38
+ @retry(wait=wait_incrementing(start=60, increment=30, max=300), stop=stop_after_attempt(5))
39
+ def safe_execute(func, *args, **kwargs):
40
+ time.sleep(2)
41
+ return func(*args, **kwargs)
42
+
43
+ def format_worksheet(self, worksheet, headers):
44
+ header_range = utils.rowcol_to_a1(1, 1) + ":" + utils.rowcol_to_a1(1, len(headers))
45
+ header_format = CellFormat(
46
+ backgroundColor=Color(201/255, 218/255, 248/255),
47
+ textFormat=TextFormat(bold=True),
48
+ horizontalAlignment="CENTER",
49
+ wrapStrategy="WRAP"
50
+ )
51
+ self.safe_execute(format_cell_range, worksheet, header_range, header_format)
52
+ worksheet.freeze(rows=1)
53
+
54
+ def get_dataframe(self, spreadsheet_key, worksheet_index=0, worksheet_title=None) -> pd.DataFrame:
55
+ """
56
+ Returns a worksheet from a Google Spreadsheet as a pandas DataFrame.
57
+ :param spreadsheet_key: ID of the Google Spreadsheet.
58
+ :param worksheet_index: Index of the worksheet (default: 0).
59
+ :param worksheet_title: Title of the worksheet (overrides index if set).
60
+ :return: pandas DataFrame with the worksheet data.
61
+ """
62
+ spreadsheet = self.gc.open_by_key(spreadsheet_key)
63
+ if worksheet_title:
64
+ worksheet = spreadsheet.worksheet(worksheet_title)
65
+ else:
66
+ worksheet = spreadsheet.get_worksheet(worksheet_index)
67
+ data = worksheet.get_all_values()
68
+ return pd.DataFrame(data[1:], columns=data[0])
69
+
70
+ def write_dataframe(
71
+ self, dataframe, worksheet_title, default_dataframe_formatting: bool = False, spreadsheet_key=None
72
+ ):
73
+
74
+ if spreadsheet_key:
75
+ self.spreadsheet = self.gc.open_by_key(spreadsheet_key)
76
+ else:
77
+ self.spreadsheet = self.gc.create(title="new", folder_id="")
78
+
79
+ try:
80
+ worksheet = self.spreadsheet.worksheet(worksheet_title)
81
+ except gspread.WorksheetNotFound:
82
+ worksheet = self.spreadsheet.add_worksheet(title=worksheet_title, rows=100, cols=20)
83
+
84
+ data = [dataframe.columns.values.tolist()] + dataframe.values.tolist()
85
+
86
+ worksheet.clear()
87
+ worksheet.update(data)
88
+
89
+ if default_dataframe_formatting:
90
+ columns = dataframe.columns.values.tolist()
91
+ self.format_worksheet(worksheet=worksheet, headers=columns)
92
+
93
+
@@ -0,0 +1,17 @@
1
+ import os
2
+
3
+
4
+ def get_env_variable(var_name, required=True):
5
+ """
6
+ Retrieves the value of an environment variable and performs validation.
7
+ :param var_name: Name of the environment variable.
8
+ :param required: If True, raises an error if the variable is not set.
9
+ :return: The value of the environment variable (or None if not required and not set).
10
+ :raises EnvironmentError: If the variable is required but not set.
11
+ """
12
+ value = os.environ.get(var_name)
13
+
14
+ if required and not value:
15
+ raise EnvironmentError(f"Environment variable '{var_name}' is not set.")
16
+
17
+ return value
@@ -0,0 +1,28 @@
1
+ import time
2
+ import telegram
3
+ from .helpers import get_env_variable
4
+ from telegram.error import RetryAfter, TimedOut
5
+
6
+
7
+ class TelegramClient:
8
+ def __init__(self, token=None):
9
+ self.token = token or get_env_variable("TELEGRAM_BOT_TOKEN")
10
+ self.telegram = telegram.Bot(token=self.token)
11
+
12
+ async def send_message(self, chat_id, text, parse_mode=None, max_retries=5, timeout_delay=5):
13
+ retries = 0
14
+
15
+ while retries < max_retries:
16
+ try:
17
+ return await self.telegram.send_message(
18
+ chat_id=chat_id,
19
+ text=text,
20
+ parse_mode=parse_mode
21
+ )
22
+ except RetryAfter as e:
23
+ wait_time = int(e.retry_after) + 1
24
+ time.sleep(wait_time)
25
+ except TimedOut:
26
+ time.sleep(timeout_delay)
27
+
28
+ retries += 1
@@ -0,0 +1,19 @@
1
+ Metadata-Version: 2.1
2
+ Name: BobrTools
3
+ Version: 0.1.0
4
+ Summary: Tools designed to simplify routine tasks for analysts, enabling faster and more efficient data processing and analysis
5
+ Author: Artsem Bobr
6
+ Author-email: artyombobr@gmail.com
7
+ License: MIT
8
+ Classifier: Programming Language :: Python :: 3
9
+ Classifier: License :: OSI Approved :: MIT License
10
+ Classifier: Operating System :: OS Independent
11
+ Requires-Python: >=3.9
12
+ Description-Content-Type: text/markdown
13
+ Requires-Dist: gspread>=6.1.4
14
+ Requires-Dist: oauth2client>=4.1.3
15
+ Requires-Dist: pandas>=2.2.3
16
+ Requires-Dist: setuptools>=68.2.2
17
+ Requires-Dist: snowflake_connector_python>=3.10.1
18
+ Requires-Dist: snowflake_snowpark_python>=1.17.0
19
+ Requires-Dist: pysocks>=1.7.1
@@ -0,0 +1,12 @@
1
+ README.md
2
+ setup.py
3
+ BobrTools/__init__.py
4
+ BobrTools/db_connectors.py
5
+ BobrTools/google_docs.py
6
+ BobrTools/helpers.py
7
+ BobrTools/telegram_client.py
8
+ BobrTools.egg-info/PKG-INFO
9
+ BobrTools.egg-info/SOURCES.txt
10
+ BobrTools.egg-info/dependency_links.txt
11
+ BobrTools.egg-info/requires.txt
12
+ BobrTools.egg-info/top_level.txt
@@ -0,0 +1,7 @@
1
+ gspread>=6.1.4
2
+ oauth2client>=4.1.3
3
+ pandas>=2.2.3
4
+ setuptools>=68.2.2
5
+ snowflake_connector_python>=3.10.1
6
+ snowflake_snowpark_python>=1.17.0
7
+ pysocks>=1.7.1
@@ -0,0 +1 @@
1
+ BobrTools
@@ -0,0 +1,19 @@
1
+ Metadata-Version: 2.1
2
+ Name: BobrTools
3
+ Version: 0.1.0
4
+ Summary: Tools designed to simplify routine tasks for analysts, enabling faster and more efficient data processing and analysis
5
+ Author: Artsem Bobr
6
+ Author-email: artyombobr@gmail.com
7
+ License: MIT
8
+ Classifier: Programming Language :: Python :: 3
9
+ Classifier: License :: OSI Approved :: MIT License
10
+ Classifier: Operating System :: OS Independent
11
+ Requires-Python: >=3.9
12
+ Description-Content-Type: text/markdown
13
+ Requires-Dist: gspread>=6.1.4
14
+ Requires-Dist: oauth2client>=4.1.3
15
+ Requires-Dist: pandas>=2.2.3
16
+ Requires-Dist: setuptools>=68.2.2
17
+ Requires-Dist: snowflake_connector_python>=3.10.1
18
+ Requires-Dist: snowflake_snowpark_python>=1.17.0
19
+ Requires-Dist: pysocks>=1.7.1
File without changes
@@ -0,0 +1,4 @@
1
+ [egg_info]
2
+ tag_build =
3
+ tag_date = 0
4
+
@@ -0,0 +1,24 @@
1
+ from setuptools import setup, find_packages
2
+
3
+ with open("requirements.txt") as f:
4
+ install_requires = f.read().splitlines()
5
+
6
+ setup(
7
+ name="BobrTools",
8
+ version="0.1.0",
9
+ description="Tools designed to simplify routine tasks for analysts, enabling faster "
10
+ "and more efficient data processing and analysis",
11
+ long_description=open("README.md", "r", encoding="utf-8").read(),
12
+ long_description_content_type="text/markdown",
13
+ author="Artsem Bobr",
14
+ author_email="artyombobr@gmail.com",
15
+ license="MIT",
16
+ packages=find_packages(),
17
+ install_requires=install_requires,
18
+ classifiers=[
19
+ "Programming Language :: Python :: 3",
20
+ "License :: OSI Approved :: MIT License",
21
+ "Operating System :: OS Independent",
22
+ ],
23
+ python_requires=">=3.9",
24
+ )