rdxz2-utill 1.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of rdxz2-utill might be problematic. Click here for more details.

utill/my_pg.py ADDED
@@ -0,0 +1,159 @@
1
+ import csv
2
+ import json
3
+ import os
4
+ import psycopg
5
+ import psycopg.rows
6
+
7
+ from loguru import logger
8
+ from textwrap import dedent
9
+
10
+ from .my_env import PG_FILENAME
11
+ from .my_string import generate_random_string
12
+ from .my_tunnel import establish_tunnel
13
+
14
+
15
+ class PG:
16
+ def __init__(
17
+ self,
18
+ connection=None,
19
+ config_source: str | dict = PG_FILENAME,
20
+ autocommit: bool = True,
21
+ ) -> None:
22
+ if type(config_source) == str:
23
+ if connection is None:
24
+ raise ValueError('Connection name must be provided when using file source!')
25
+ conf = json.loads(open(os.path.expanduser(config_source)).read())[connection]
26
+ elif type(config_source) == dict:
27
+ conf = config_source
28
+ (_, host, port) = establish_tunnel(conf)
29
+ self.db_host = host
30
+ self.db_port = port
31
+ self.db_username = conf['username']
32
+ self.db_password = conf['password']
33
+ self.db_name = conf['db']
34
+ self.conf = conf
35
+
36
+ self.conn = None
37
+ self.cursor = None
38
+ self.establish_connection(autocommit)
39
+
40
+ def __enter__(self):
41
+ return self
42
+
43
+ def __exit__(self, exc_type, exc_value, exc_tb):
44
+ self.close()
45
+
46
+ def establish_connection(self, autocommit: bool, row_factory: psycopg.rows = psycopg.rows.dict_row):
47
+ self.conn = psycopg.connect(f'postgresql://{self.db_username}:{self.db_password}@{self.db_host}:{self.db_port}/{self.db_name}', autocommit=autocommit)
48
+ self.cursor = self.conn.cursor(row_factory=row_factory)
49
+ logger.debug(f'PG client open: {self.db_username}@{self.db_host}:{self.db_port}/{self.db_name}, autocommit={self.conn.autocommit}')
50
+
51
+ def change_autocommit(self, autocommit: bool):
52
+ if autocommit == self.conn.autocommit:
53
+ return
54
+
55
+ self.conn.autocommit = autocommit
56
+
57
+ def execute_query(self, query: str, *params):
58
+ # Make sure connection alive
59
+ if self.conn.closed:
60
+ self.establish_connection(self.conn.autocommit)
61
+
62
+ query = query.strip()
63
+ logger.debug(f'🔎 Query:\n{query}')
64
+
65
+ return self.cursor.execute(query, params)
66
+
67
+ def download_csv(self, query: str, file_path: str) -> None:
68
+ query = dedent(
69
+ f'''
70
+ COPY ({query})
71
+ TO STDOUT
72
+ WITH DELIMITER ','
73
+ CSV HEADER;
74
+ '''
75
+ )
76
+ logger.debug(f'🔎 Query:\n{query}')
77
+ with open(os.path.expanduser(file_path), 'wb') as f:
78
+ with self.cursor.copy(query) as copy:
79
+ for data in copy:
80
+ f.write(data)
81
+
82
+ def pg_to_pg(self, pg: "PG", source_table: str, target_table: str, cols: list[str] = None) -> None:
83
+ tmp_filename = generate_random_string() + '.csv'
84
+ cols_str = ','.join([f'"{x}"' for x in cols]) if (cols is not None and cols != []) else '*'
85
+ try:
86
+ self.download_csv(f'SELECT {cols_str} FROM {source_table}', tmp_filename)
87
+ pg.upload_csv(tmp_filename, target_table)
88
+ except:
89
+ raise
90
+ finally:
91
+ os.remove(tmp_filename) if os.path.exists(tmp_filename) else None
92
+
93
+ def check_table_existence(self, table_name: str) -> bool:
94
+ if not self.execute_query('''SELECT count(1) AS "cnt" FROM "information_schema"."tables" WHERE "table_schema" || '.' || "table_name" = '%s';''', table_name).fetchone()['cnt']:
95
+ raise Exception(f'Target table \'{table_name}\' not created, please create it first!')
96
+
97
+ def upload_tuples(self, cols: list[str], tuples: list[tuple], table_name: str) -> None:
98
+ self.check_table_existence(table_name)
99
+
100
+ cols_str = ','.join([f'"{x}"' for x in cols])
101
+ query = f'''COPY {table_name}({cols_str}) FROM STDIN'''
102
+ logger.debug(f'🔎 Query:\n{query}')
103
+ with self.cursor.copy(query) as copy:
104
+ for row in tuples:
105
+ copy.write_row(row)
106
+
107
+ def upload_list_of_dict(self, data: list[dict], table_name: str) -> None:
108
+ self.check_table_existence(table_name)
109
+
110
+ if len(data) == 0:
111
+ raise ValueError('No data to upload!')
112
+
113
+ cols = data[0].keys()
114
+ cols_str = ','.join([f'"{x}"' for x in cols])
115
+ query = f'''COPY {table_name}({cols_str}) FROM STDIN'''
116
+ logger.debug(f'🔎 Query:\n{query}')
117
+ with self.cursor.copy(query) as copy:
118
+ for row in data:
119
+ copy.write_row(tuple(row[col] for col in cols))
120
+
121
+ def upload_csv(self, file_path: str, table_name: str) -> None:
122
+ self.check_table_existence(table_name)
123
+
124
+ cols_str = ','.join([f'"{x}"' for x in next(csv.reader(open(file_path, 'r')))])
125
+ query = dedent(
126
+ f'''
127
+ COPY {table_name}({cols_str})
128
+ FROM STDIN
129
+ DELIMITER ','
130
+ CSV HEADER;
131
+ '''
132
+ )
133
+ logger.debug(f'🔎 Query:\n{query}')
134
+ with open(os.path.expanduser(file_path), 'r') as f:
135
+ with self.cursor.copy(query) as copy:
136
+ while data := f.read(1024):
137
+ copy.write(data)
138
+
139
+ def create_index(self, table_name: str, index: str | list[str], unique: bool = False) -> None:
140
+ try:
141
+ index = index if type(index) == list else [index]
142
+ indexes = ','.join([f'"{x}"' for x in index])
143
+ self.execute_query(f'CREATE {"UNIQUE " if unique else ""}INDEX ON "{table_name}" ({indexes});', return_df=False)
144
+ except Exception as e:
145
+ self.rollback()
146
+ raise e
147
+
148
+ def rollback(self):
149
+ self.conn.rollback()
150
+ logger.debug('🚫 Transaction rollback')
151
+
152
+ def commit(self):
153
+ self.conn.commit()
154
+ logger.debug('✅ Transaction commit')
155
+
156
+ def close(self):
157
+ self.cursor.close()
158
+ self.conn.close()
159
+ logger.debug('PG client close')
utill/my_queue.py ADDED
@@ -0,0 +1,66 @@
1
+ import queue
2
+ import concurrent.futures
3
+
4
+ from loguru import logger
5
+
6
+
7
+ class ThreadingQ:
8
+ def __init__(self) -> None:
9
+ self.q = queue.Queue()
10
+
11
+ self.producer_func = None
12
+ self.producer_args = None
13
+ self.consumer_func = None
14
+
15
+ def add_producer(self, func, *args):
16
+ self.producer_func = func
17
+ self.producer_args = args or []
18
+ return self
19
+
20
+ def add_consumer(self, func):
21
+ self.consumer_func = func
22
+ # The consume args is based on producer output
23
+ return self
24
+
25
+ def execute(self):
26
+ if not all([self.producer_func is not None, self.producer_args is not None, self.consumer_func is not None]):
27
+ raise Exception('Producer and Consumer functions must be defined!')
28
+
29
+ def producer():
30
+ results = []
31
+
32
+ for item in self.producer_func(*self.producer_args):
33
+ self.q.put(item)
34
+ results.append(item)
35
+ logger.debug(f'🌾 Produced {item}')
36
+
37
+ self.q.put(None)
38
+ return results
39
+
40
+ def consumer():
41
+ results = []
42
+
43
+ while True:
44
+ item = self.q.get()
45
+ if item is None:
46
+ break
47
+
48
+ result = self.consumer_func(*item)
49
+ results.append(result)
50
+
51
+ self.q.task_done()
52
+ logger.debug(f'🔥 Consumed {item}')
53
+
54
+ return results
55
+
56
+ with concurrent.futures.ThreadPoolExecutor() as executor:
57
+ # Schedule the producer and consumer
58
+ self.future_producer = executor.submit(producer)
59
+ self.future_consumer = executor.submit(consumer)
60
+
61
+ producer_result = self.future_producer.result()
62
+ logger.debug('✅ Producer done')
63
+ consumer_result = self.future_consumer.result()
64
+ logger.debug('✅ Consumer done')
65
+
66
+ return producer_result, consumer_result
utill/my_string.py ADDED
@@ -0,0 +1,20 @@
1
+ import hashlib
2
+ import random
3
+ import re
4
+ import string
5
+
6
+
7
+ def generate_random_string(length: int = 4, alphanum: bool = False): return ''.join(random.choice(string.ascii_letters + string.digits + (r'!@#$%^&*()-=_+[]{};\':",./<>?' if not alphanum else '')) for _ in range(length))
8
+
9
+
10
+ def replace_nonnumeric(string: str, replace: str) -> str: return re.sub('[^0-9a-zA-Z]+', replace, string)
11
+
12
+
13
+ def mask(string: str, mask_length_min: int = 5, mask_length_max: int = 50, display_length: int = 5) -> str:
14
+ if not string:
15
+ mask_length = mask_length_min
16
+ else:
17
+ hash_value = int(hashlib.sha256(string.encode()).hexdigest(), 16)
18
+ mask_length = mask_length_min + (hash_value % (mask_length_max - mask_length_min + 1))
19
+
20
+ return ('*' * mask_length) + (string[(-display_length if len(string) > display_length else -1):])
utill/my_style.py ADDED
@@ -0,0 +1,39 @@
1
+ class Styles:
2
+ NONE = '\033[0m'
3
+ ITALIC = '\033[3m'
4
+ BOLD = '\033[1m'
5
+ UNDERLINE = '\033[4m'
6
+
7
+
8
+ class Colors:
9
+ HEADER = '\033[95m'
10
+ OKBLUE = '\033[94m'
11
+ OKCYAN = '\033[96m'
12
+ OKGREEN = '\033[92m'
13
+ WARNING = '\033[93m'
14
+ RED = '\033[91m'
15
+ BOLD = '\033[1m'
16
+ UNDERLINE = '\033[4m'
17
+
18
+
19
+ def make_style(styles_or_colors: list[Styles | Colors] | Styles | Colors, string: str) -> str:
20
+ if type(styles_or_colors) == list:
21
+ return ''.join(styles_or_colors) + string + Styles.NONE
22
+ else:
23
+ return styles_or_colors + string + Styles.NONE
24
+
25
+
26
+ def bold(string: str) -> str:
27
+ return make_style(Styles.BOLD, string)
28
+
29
+
30
+ def italic(string: str) -> str:
31
+ return make_style(Styles.ITALIC, string)
32
+
33
+
34
+ def underline(string: str) -> str:
35
+ return make_style(Styles.UNDERLINE, string)
36
+
37
+
38
+ def color(string: str, color: Colors) -> str:
39
+ return make_style(color, string)
utill/my_tunnel.py ADDED
@@ -0,0 +1,42 @@
1
+ import socket
2
+
3
+ from loguru import logger
4
+ from sshtunnel import SSHTunnelForwarder
5
+
6
+ LOCALHOST = '127.0.0.1'
7
+
8
+
9
+ def _get_random_port() -> int:
10
+ s = socket.socket()
11
+ s.bind((LOCALHOST, 0))
12
+ return s.getsockname()[1]
13
+
14
+
15
+ def start_tunnel(host: str, port: int, user: str, key: str, target_host: str, target_port: int, local_port: int = None) -> int:
16
+ local_port = local_port or _get_random_port()
17
+
18
+ tunnel = SSHTunnelForwarder(
19
+ (host, port),
20
+ ssh_username=user,
21
+ ssh_private_key=key,
22
+ remote_bind_address=(target_host, target_port),
23
+ local_bind_address=(LOCALHOST, local_port),
24
+ )
25
+
26
+ tunnel.start()
27
+
28
+ return (tunnel, LOCALHOST, local_port)
29
+
30
+
31
+ def establish_tunnel(conf: dict, local_port: int = None) -> tuple:
32
+ using_tunnel = bool(conf.get('tunnel_host'))
33
+ local_host = LOCALHOST if using_tunnel else conf['host']
34
+
35
+ z = start_tunnel(conf['tunnel_host'], conf['tunnel_port'], conf['tunnel_username'], conf['tunnel_key'], conf['host'], conf['port'], local_port=local_port)\
36
+ if using_tunnel\
37
+ else (None, local_host, conf['port'])
38
+
39
+ if using_tunnel:
40
+ logger.debug(f'🛣️ Tunnel established: {conf["host"]}:{conf["port"]} --> {conf["tunnel_username"]}@{conf["tunnel_host"]} --> {z[1]}:{z[2]}')
41
+
42
+ return z
utill/my_xlsx.py ADDED
@@ -0,0 +1,21 @@
1
+ import duckdb
2
+ import multiprocessing
3
+
4
+ from loguru import logger
5
+
6
+
7
+ def xlsx_to_csv(filename: str, sheet: str):
8
+ con = duckdb.connect()
9
+ return con.execute('install spatial;')\
10
+ .execute('load spatial;')\
11
+ .execute(f'select * from st_read(\'{filename}\', layer=\'{sheet}\');')\
12
+ .fetchall()
13
+
14
+
15
+ def csv_to_xlsx(filename: str, output_file_path: str):
16
+ logger.info(f'Converting csv \'{filename}\' into xlsx \'{output_file_path}\' ...')
17
+ con = duckdb.connect()
18
+ con.execute('install spatial;')\
19
+ .execute('load spatial;')\
20
+ .execute(f'set threads to {multiprocessing.cpu_count()};')\
21
+ .execute(f'copy \'{filename}\' to \'{output_file_path}\' with(format gdal, driver \'xlsx\')')