BobrTools 0.1.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- bobrtools-0.1.0/BobrTools/__init__.py +3 -0
- bobrtools-0.1.0/BobrTools/db_connectors.py +131 -0
- bobrtools-0.1.0/BobrTools/google_docs.py +93 -0
- bobrtools-0.1.0/BobrTools/helpers.py +17 -0
- bobrtools-0.1.0/BobrTools/telegram_client.py +28 -0
- bobrtools-0.1.0/BobrTools.egg-info/PKG-INFO +19 -0
- bobrtools-0.1.0/BobrTools.egg-info/SOURCES.txt +12 -0
- bobrtools-0.1.0/BobrTools.egg-info/dependency_links.txt +1 -0
- bobrtools-0.1.0/BobrTools.egg-info/requires.txt +7 -0
- bobrtools-0.1.0/BobrTools.egg-info/top_level.txt +1 -0
- bobrtools-0.1.0/PKG-INFO +19 -0
- bobrtools-0.1.0/README.md +0 -0
- bobrtools-0.1.0/setup.cfg +4 -0
- bobrtools-0.1.0/setup.py +24 -0
|
@@ -0,0 +1,131 @@
|
|
|
1
|
+
import socks
|
|
2
|
+
import socket
|
|
3
|
+
import logging
|
|
4
|
+
import pandas as pd
|
|
5
|
+
import snowflake.connector
|
|
6
|
+
from snowflake.connector.pandas_tools import write_pandas
|
|
7
|
+
from .helpers import get_env_variable
|
|
8
|
+
|
|
9
|
+
logger = logging.getLogger(__name__)
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
class SnowflakeConnector:
|
|
13
|
+
def __init__(
|
|
14
|
+
self, user=None, password=None, account=None, default_warehouse=None, default_database=None, default_schema=None,
|
|
15
|
+
default_role=None, use_proxy=True, proxy_host=None, proxy_port=None, proxy_username=None, proxy_password=None
|
|
16
|
+
):
|
|
17
|
+
self.user = user or get_env_variable("SNOWFLAKE_USER")
|
|
18
|
+
self.password = password or get_env_variable("SNOWFLAKE_PASSWORD")
|
|
19
|
+
self.account = account or get_env_variable("SNOWFLAKE_ACCOUNT")
|
|
20
|
+
self.default_role = default_role or get_env_variable("SNOWFLAKE_ROLE")
|
|
21
|
+
self.default_warehouse = default_warehouse or get_env_variable("SNOWFLAKE_WAREHOUSE")
|
|
22
|
+
self.default_database = default_database or get_env_variable("SNOWFLAKE_DATABASE")
|
|
23
|
+
self.default_schema = default_schema or get_env_variable("SNOWFLAKE_SCHEMA")
|
|
24
|
+
self.use_proxy = use_proxy
|
|
25
|
+
|
|
26
|
+
if self.use_proxy:
|
|
27
|
+
self.default_socket = socket.socket
|
|
28
|
+
self.proxy_host = proxy_host or get_env_variable("PROXY_HOST")
|
|
29
|
+
self.proxy_port = proxy_port or int(get_env_variable("PROXY_PORT"))
|
|
30
|
+
self.proxy_username = proxy_username or get_env_variable("PROXY_USERNAME")
|
|
31
|
+
self.proxy_password = proxy_password or get_env_variable("PROXY_PASSWORD")
|
|
32
|
+
|
|
33
|
+
def init_proxy(self):
|
|
34
|
+
"""
|
|
35
|
+
Configures a SOCKS5 proxy for Snowflake connections.
|
|
36
|
+
"""
|
|
37
|
+
socks.set_default_proxy(
|
|
38
|
+
socks.SOCKS5,
|
|
39
|
+
addr=self.proxy_host,
|
|
40
|
+
port=self.proxy_port,
|
|
41
|
+
username=self.proxy_username,
|
|
42
|
+
password=self.proxy_password,
|
|
43
|
+
)
|
|
44
|
+
socket.socket = socks.socksocket
|
|
45
|
+
|
|
46
|
+
def reset_proxy(self):
|
|
47
|
+
"""
|
|
48
|
+
Resets the proxy settings to default.
|
|
49
|
+
"""
|
|
50
|
+
socket.socket = self.default_socket
|
|
51
|
+
|
|
52
|
+
def connect(self, role=None, warehouse=None, database=None, schema=None):
|
|
53
|
+
"""
|
|
54
|
+
Establishes and returns a connection to Snowflake, with optional overrides
|
|
55
|
+
for warehouse, database, and schema.
|
|
56
|
+
:param role: Name of the Snowflake role to override.
|
|
57
|
+
:param warehouse: Name of the Snowflake warehouse to override.
|
|
58
|
+
:param database: Name of the Snowflake database to override.
|
|
59
|
+
:param schema: Name of the Snowflake schema to override.
|
|
60
|
+
:return: Snowflake connection object.
|
|
61
|
+
"""
|
|
62
|
+
return snowflake.connector.connect(
|
|
63
|
+
user=self.user,
|
|
64
|
+
password=self.password,
|
|
65
|
+
account=self.account,
|
|
66
|
+
role=role or self.default_role,
|
|
67
|
+
warehouse=warehouse or self.default_warehouse,
|
|
68
|
+
database=database or self.default_database,
|
|
69
|
+
schema=schema or self.default_schema
|
|
70
|
+
)
|
|
71
|
+
|
|
72
|
+
def upload_dataframe(
|
|
73
|
+
self, dataframe, table_name, overwrite=True, auto_create_table=True, warehouse=None, database=None, schema=None
|
|
74
|
+
):
|
|
75
|
+
"""
|
|
76
|
+
Uploads a Pandas DataFrame to a Snowflake table, with optional overrides
|
|
77
|
+
for warehouse, database, and schema.
|
|
78
|
+
Temporarily enables proxy if configured.
|
|
79
|
+
:param dataframe: pandas DataFrame to upload.
|
|
80
|
+
:param table_name: Name of the target table in Snowflake.
|
|
81
|
+
:param overwrite: When true, and if auto_create_table is true, then it drops the table. Otherwise, it
|
|
82
|
+
:param auto_create_table: When true, will automatically create a table with corresponding columns for each
|
|
83
|
+
column in the passed in DataFrame. The table will not be created if it already exist
|
|
84
|
+
:param warehouse: Name of the Snowflake warehouse to override.
|
|
85
|
+
:param database: Name of the Snowflake database to override.
|
|
86
|
+
:param schema: Name of the Snowflake schema to override.
|
|
87
|
+
:return: None.
|
|
88
|
+
:raises Exception: If the upload fails.
|
|
89
|
+
"""
|
|
90
|
+
try:
|
|
91
|
+
if self.use_proxy:
|
|
92
|
+
self.init_proxy()
|
|
93
|
+
|
|
94
|
+
conn = self.connect(warehouse=warehouse, database=database, schema=schema)
|
|
95
|
+
try:
|
|
96
|
+
success, nchunks, nrows, _ = write_pandas(
|
|
97
|
+
conn,
|
|
98
|
+
dataframe,
|
|
99
|
+
table_name,
|
|
100
|
+
overwrite=overwrite,
|
|
101
|
+
auto_create_table=auto_create_table
|
|
102
|
+
)
|
|
103
|
+
if success:
|
|
104
|
+
logger.info(
|
|
105
|
+
f"DataFrame successfully uploaded to table '{table_name}' in database '{database or self.default_database}' "
|
|
106
|
+
f"and schema '{schema or self.default_schema}'. Rows inserted: {nrows}")
|
|
107
|
+
else:
|
|
108
|
+
raise Exception("Failed to upload DataFrame to Snowflake.")
|
|
109
|
+
finally:
|
|
110
|
+
conn.close()
|
|
111
|
+
finally:
|
|
112
|
+
if self.use_proxy:
|
|
113
|
+
self.reset_proxy()
|
|
114
|
+
|
|
115
|
+
def get_dataframe(self, query, warehouse=None, database=None, schema=None):
|
|
116
|
+
try:
|
|
117
|
+
if self.use_proxy:
|
|
118
|
+
self.init_proxy()
|
|
119
|
+
|
|
120
|
+
conn = self.connect(warehouse=warehouse, database=database, schema=schema)
|
|
121
|
+
cursor = conn.cursor()
|
|
122
|
+
cursor.execute(query)
|
|
123
|
+
dataframe = pd.DataFrame(cursor.fetchall(), columns=[column[0] for column in cursor.description])
|
|
124
|
+
cursor.close()
|
|
125
|
+
conn.close()
|
|
126
|
+
|
|
127
|
+
return dataframe
|
|
128
|
+
|
|
129
|
+
finally:
|
|
130
|
+
if self.use_proxy:
|
|
131
|
+
self.reset_proxy()
|
|
@@ -0,0 +1,93 @@
|
|
|
1
|
+
import time
|
|
2
|
+
import gspread
|
|
3
|
+
import pandas as pd
|
|
4
|
+
from gspread import utils
|
|
5
|
+
from oauth2client.service_account import ServiceAccountCredentials
|
|
6
|
+
from tenacity import (
|
|
7
|
+
retry,
|
|
8
|
+
wait_incrementing,
|
|
9
|
+
stop_after_attempt,
|
|
10
|
+
)
|
|
11
|
+
from gspread_formatting import (
|
|
12
|
+
set_frozen,
|
|
13
|
+
format_cell_range,
|
|
14
|
+
format_cell_ranges,
|
|
15
|
+
CellFormat,
|
|
16
|
+
Color,
|
|
17
|
+
TextFormat,
|
|
18
|
+
NumberFormat,
|
|
19
|
+
)
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
class GoogleDocs:
|
|
23
|
+
def __init__(self, keyfile=None):
|
|
24
|
+
"""
|
|
25
|
+
Initializes the GoogleDocs class with credentials for Google API.
|
|
26
|
+
:param keyfile: Path to the JSON file containing Google service account credentials.
|
|
27
|
+
Defaults to "./credentials.json" if not provided.
|
|
28
|
+
"""
|
|
29
|
+
if keyfile is None:
|
|
30
|
+
keyfile = "./credentials.json"
|
|
31
|
+
|
|
32
|
+
scope = ["https://spreadsheets.google.com/feeds", "https://www.googleapis.com/auth/drive"]
|
|
33
|
+
credentials = ServiceAccountCredentials.from_json_keyfile_name(keyfile, scope)
|
|
34
|
+
self.spreadsheet = None
|
|
35
|
+
self.gc = gspread.authorize(credentials)
|
|
36
|
+
|
|
37
|
+
@staticmethod
|
|
38
|
+
@retry(wait=wait_incrementing(start=60, increment=30, max=300), stop=stop_after_attempt(5))
|
|
39
|
+
def safe_execute(func, *args, **kwargs):
|
|
40
|
+
time.sleep(2)
|
|
41
|
+
return func(*args, **kwargs)
|
|
42
|
+
|
|
43
|
+
def format_worksheet(self, worksheet, headers):
|
|
44
|
+
header_range = utils.rowcol_to_a1(1, 1) + ":" + utils.rowcol_to_a1(1, len(headers))
|
|
45
|
+
header_format = CellFormat(
|
|
46
|
+
backgroundColor=Color(201/255, 218/255, 248/255),
|
|
47
|
+
textFormat=TextFormat(bold=True),
|
|
48
|
+
horizontalAlignment="CENTER",
|
|
49
|
+
wrapStrategy="WRAP"
|
|
50
|
+
)
|
|
51
|
+
self.safe_execute(format_cell_range, worksheet, header_range, header_format)
|
|
52
|
+
worksheet.freeze(rows=1)
|
|
53
|
+
|
|
54
|
+
def get_dataframe(self, spreadsheet_key, worksheet_index=0, worksheet_title=None) -> pd.DataFrame:
|
|
55
|
+
"""
|
|
56
|
+
Returns a worksheet from a Google Spreadsheet as a pandas DataFrame.
|
|
57
|
+
:param spreadsheet_key: ID of the Google Spreadsheet.
|
|
58
|
+
:param worksheet_index: Index of the worksheet (default: 0).
|
|
59
|
+
:param worksheet_title: Title of the worksheet (overrides index if set).
|
|
60
|
+
:return: pandas DataFrame with the worksheet data.
|
|
61
|
+
"""
|
|
62
|
+
spreadsheet = self.gc.open_by_key(spreadsheet_key)
|
|
63
|
+
if worksheet_title:
|
|
64
|
+
worksheet = spreadsheet.worksheet(worksheet_title)
|
|
65
|
+
else:
|
|
66
|
+
worksheet = spreadsheet.get_worksheet(worksheet_index)
|
|
67
|
+
data = worksheet.get_all_values()
|
|
68
|
+
return pd.DataFrame(data[1:], columns=data[0])
|
|
69
|
+
|
|
70
|
+
def write_dataframe(
|
|
71
|
+
self, dataframe, worksheet_title, default_dataframe_formatting: bool = False, spreadsheet_key=None
|
|
72
|
+
):
|
|
73
|
+
|
|
74
|
+
if spreadsheet_key:
|
|
75
|
+
self.spreadsheet = self.gc.open_by_key(spreadsheet_key)
|
|
76
|
+
else:
|
|
77
|
+
self.spreadsheet = self.gc.create(title="new", folder_id="")
|
|
78
|
+
|
|
79
|
+
try:
|
|
80
|
+
worksheet = self.spreadsheet.worksheet(worksheet_title)
|
|
81
|
+
except gspread.WorksheetNotFound:
|
|
82
|
+
worksheet = self.spreadsheet.add_worksheet(title=worksheet_title, rows=100, cols=20)
|
|
83
|
+
|
|
84
|
+
data = [dataframe.columns.values.tolist()] + dataframe.values.tolist()
|
|
85
|
+
|
|
86
|
+
worksheet.clear()
|
|
87
|
+
worksheet.update(data)
|
|
88
|
+
|
|
89
|
+
if default_dataframe_formatting:
|
|
90
|
+
columns = dataframe.columns.values.tolist()
|
|
91
|
+
self.format_worksheet(worksheet=worksheet, headers=columns)
|
|
92
|
+
|
|
93
|
+
|
|
@@ -0,0 +1,17 @@
|
|
|
1
|
+
import os
|
|
2
|
+
|
|
3
|
+
|
|
4
|
+
def get_env_variable(var_name, required=True):
|
|
5
|
+
"""
|
|
6
|
+
Retrieves the value of an environment variable and performs validation.
|
|
7
|
+
:param var_name: Name of the environment variable.
|
|
8
|
+
:param required: If True, raises an error if the variable is not set.
|
|
9
|
+
:return: The value of the environment variable (or None if not required and not set).
|
|
10
|
+
:raises EnvironmentError: If the variable is required but not set.
|
|
11
|
+
"""
|
|
12
|
+
value = os.environ.get(var_name)
|
|
13
|
+
|
|
14
|
+
if required and not value:
|
|
15
|
+
raise EnvironmentError(f"Environment variable '{var_name}' is not set.")
|
|
16
|
+
|
|
17
|
+
return value
|
|
@@ -0,0 +1,28 @@
|
|
|
1
|
+
import time
|
|
2
|
+
import telegram
|
|
3
|
+
from .helpers import get_env_variable
|
|
4
|
+
from telegram.error import RetryAfter, TimedOut
|
|
5
|
+
|
|
6
|
+
|
|
7
|
+
class TelegramClient:
|
|
8
|
+
def __init__(self, token=None):
|
|
9
|
+
self.token = token or get_env_variable("TELEGRAM_BOT_TOKEN")
|
|
10
|
+
self.telegram = telegram.Bot(token=self.token)
|
|
11
|
+
|
|
12
|
+
async def send_message(self, chat_id, text, parse_mode=None, max_retries=5, timeout_delay=5):
|
|
13
|
+
retries = 0
|
|
14
|
+
|
|
15
|
+
while retries < max_retries:
|
|
16
|
+
try:
|
|
17
|
+
return await self.telegram.send_message(
|
|
18
|
+
chat_id=chat_id,
|
|
19
|
+
text=text,
|
|
20
|
+
parse_mode=parse_mode
|
|
21
|
+
)
|
|
22
|
+
except RetryAfter as e:
|
|
23
|
+
wait_time = int(e.retry_after) + 1
|
|
24
|
+
time.sleep(wait_time)
|
|
25
|
+
except TimedOut:
|
|
26
|
+
time.sleep(timeout_delay)
|
|
27
|
+
|
|
28
|
+
retries += 1
|
|
@@ -0,0 +1,19 @@
|
|
|
1
|
+
Metadata-Version: 2.1
|
|
2
|
+
Name: BobrTools
|
|
3
|
+
Version: 0.1.0
|
|
4
|
+
Summary: Tools designed to simplify routine tasks for analysts, enabling faster and more efficient data processing and analysis
|
|
5
|
+
Author: Artsem Bobr
|
|
6
|
+
Author-email: artyombobr@gmail.com
|
|
7
|
+
License: MIT
|
|
8
|
+
Classifier: Programming Language :: Python :: 3
|
|
9
|
+
Classifier: License :: OSI Approved :: MIT License
|
|
10
|
+
Classifier: Operating System :: OS Independent
|
|
11
|
+
Requires-Python: >=3.9
|
|
12
|
+
Description-Content-Type: text/markdown
|
|
13
|
+
Requires-Dist: gspread>=6.1.4
|
|
14
|
+
Requires-Dist: oauth2client>=4.1.3
|
|
15
|
+
Requires-Dist: pandas>=2.2.3
|
|
16
|
+
Requires-Dist: setuptools>=68.2.2
|
|
17
|
+
Requires-Dist: snowflake_connector_python>=3.10.1
|
|
18
|
+
Requires-Dist: snowflake_snowpark_python>=1.17.0
|
|
19
|
+
Requires-Dist: pysocks>=1.7.1
|
|
@@ -0,0 +1,12 @@
|
|
|
1
|
+
README.md
|
|
2
|
+
setup.py
|
|
3
|
+
BobrTools/__init__.py
|
|
4
|
+
BobrTools/db_connectors.py
|
|
5
|
+
BobrTools/google_docs.py
|
|
6
|
+
BobrTools/helpers.py
|
|
7
|
+
BobrTools/telegram_client.py
|
|
8
|
+
BobrTools.egg-info/PKG-INFO
|
|
9
|
+
BobrTools.egg-info/SOURCES.txt
|
|
10
|
+
BobrTools.egg-info/dependency_links.txt
|
|
11
|
+
BobrTools.egg-info/requires.txt
|
|
12
|
+
BobrTools.egg-info/top_level.txt
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
BobrTools
|
bobrtools-0.1.0/PKG-INFO
ADDED
|
@@ -0,0 +1,19 @@
|
|
|
1
|
+
Metadata-Version: 2.1
|
|
2
|
+
Name: BobrTools
|
|
3
|
+
Version: 0.1.0
|
|
4
|
+
Summary: Tools designed to simplify routine tasks for analysts, enabling faster and more efficient data processing and analysis
|
|
5
|
+
Author: Artsem Bobr
|
|
6
|
+
Author-email: artyombobr@gmail.com
|
|
7
|
+
License: MIT
|
|
8
|
+
Classifier: Programming Language :: Python :: 3
|
|
9
|
+
Classifier: License :: OSI Approved :: MIT License
|
|
10
|
+
Classifier: Operating System :: OS Independent
|
|
11
|
+
Requires-Python: >=3.9
|
|
12
|
+
Description-Content-Type: text/markdown
|
|
13
|
+
Requires-Dist: gspread>=6.1.4
|
|
14
|
+
Requires-Dist: oauth2client>=4.1.3
|
|
15
|
+
Requires-Dist: pandas>=2.2.3
|
|
16
|
+
Requires-Dist: setuptools>=68.2.2
|
|
17
|
+
Requires-Dist: snowflake_connector_python>=3.10.1
|
|
18
|
+
Requires-Dist: snowflake_snowpark_python>=1.17.0
|
|
19
|
+
Requires-Dist: pysocks>=1.7.1
|
|
File without changes
|
bobrtools-0.1.0/setup.py
ADDED
|
@@ -0,0 +1,24 @@
|
|
|
1
|
+
from setuptools import setup, find_packages
|
|
2
|
+
|
|
3
|
+
with open("requirements.txt") as f:
|
|
4
|
+
install_requires = f.read().splitlines()
|
|
5
|
+
|
|
6
|
+
setup(
|
|
7
|
+
name="BobrTools",
|
|
8
|
+
version="0.1.0",
|
|
9
|
+
description="Tools designed to simplify routine tasks for analysts, enabling faster "
|
|
10
|
+
"and more efficient data processing and analysis",
|
|
11
|
+
long_description=open("README.md", "r", encoding="utf-8").read(),
|
|
12
|
+
long_description_content_type="text/markdown",
|
|
13
|
+
author="Artsem Bobr",
|
|
14
|
+
author_email="artyombobr@gmail.com",
|
|
15
|
+
license="MIT",
|
|
16
|
+
packages=find_packages(),
|
|
17
|
+
install_requires=install_requires,
|
|
18
|
+
classifiers=[
|
|
19
|
+
"Programming Language :: Python :: 3",
|
|
20
|
+
"License :: OSI Approved :: MIT License",
|
|
21
|
+
"Operating System :: OS Independent",
|
|
22
|
+
],
|
|
23
|
+
python_requires=">=3.9",
|
|
24
|
+
)
|