rdxz2-utill 1.0.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of rdxz2-utill might be problematic. Click here for more details.
- rdxz2_utill-1.0.0.dist-info/METADATA +168 -0
- rdxz2_utill-1.0.0.dist-info/RECORD +32 -0
- rdxz2_utill-1.0.0.dist-info/WHEEL +5 -0
- rdxz2_utill-1.0.0.dist-info/entry_points.txt +2 -0
- rdxz2_utill-1.0.0.dist-info/licenses/LICENSE +21 -0
- rdxz2_utill-1.0.0.dist-info/top_level.txt +1 -0
- utill/__init__.py +0 -0
- utill/cmd/__init__.py +0 -0
- utill/cmd/_bq.py +12 -0
- utill/cmd/_conf.py +62 -0
- utill/cmd/_enc.py +26 -0
- utill/cmd/_main.py +8 -0
- utill/cmd/_pg.py +15 -0
- utill/cmd/utill.py +90 -0
- utill/my_bq.py +358 -0
- utill/my_const.py +18 -0
- utill/my_csv.py +90 -0
- utill/my_datetime.py +63 -0
- utill/my_dict.py +12 -0
- utill/my_encryption.py +52 -0
- utill/my_env.py +66 -0
- utill/my_file.py +60 -0
- utill/my_gcs.py +117 -0
- utill/my_input.py +11 -0
- utill/my_json.py +62 -0
- utill/my_mb.py +375 -0
- utill/my_pg.py +159 -0
- utill/my_queue.py +66 -0
- utill/my_string.py +20 -0
- utill/my_style.py +39 -0
- utill/my_tunnel.py +42 -0
- utill/my_xlsx.py +21 -0
utill/my_pg.py
ADDED
|
@@ -0,0 +1,159 @@
|
|
|
1
|
+
import csv
|
|
2
|
+
import json
|
|
3
|
+
import os
|
|
4
|
+
import psycopg
|
|
5
|
+
import psycopg.rows
|
|
6
|
+
|
|
7
|
+
from loguru import logger
|
|
8
|
+
from textwrap import dedent
|
|
9
|
+
|
|
10
|
+
from .my_env import PG_FILENAME
|
|
11
|
+
from .my_string import generate_random_string
|
|
12
|
+
from .my_tunnel import establish_tunnel
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
class PG:
|
|
16
|
+
def __init__(
|
|
17
|
+
self,
|
|
18
|
+
connection=None,
|
|
19
|
+
config_source: str | dict = PG_FILENAME,
|
|
20
|
+
autocommit: bool = True,
|
|
21
|
+
) -> None:
|
|
22
|
+
if type(config_source) == str:
|
|
23
|
+
if connection is None:
|
|
24
|
+
raise ValueError('Connection name must be provided when using file source!')
|
|
25
|
+
conf = json.loads(open(os.path.expanduser(config_source)).read())[connection]
|
|
26
|
+
elif type(config_source) == dict:
|
|
27
|
+
conf = config_source
|
|
28
|
+
(_, host, port) = establish_tunnel(conf)
|
|
29
|
+
self.db_host = host
|
|
30
|
+
self.db_port = port
|
|
31
|
+
self.db_username = conf['username']
|
|
32
|
+
self.db_password = conf['password']
|
|
33
|
+
self.db_name = conf['db']
|
|
34
|
+
self.conf = conf
|
|
35
|
+
|
|
36
|
+
self.conn = None
|
|
37
|
+
self.cursor = None
|
|
38
|
+
self.establish_connection(autocommit)
|
|
39
|
+
|
|
40
|
+
def __enter__(self):
|
|
41
|
+
return self
|
|
42
|
+
|
|
43
|
+
def __exit__(self, exc_type, exc_value, exc_tb):
|
|
44
|
+
self.close()
|
|
45
|
+
|
|
46
|
+
def establish_connection(self, autocommit: bool, row_factory: psycopg.rows = psycopg.rows.dict_row):
|
|
47
|
+
self.conn = psycopg.connect(f'postgresql://{self.db_username}:{self.db_password}@{self.db_host}:{self.db_port}/{self.db_name}', autocommit=autocommit)
|
|
48
|
+
self.cursor = self.conn.cursor(row_factory=row_factory)
|
|
49
|
+
logger.debug(f'PG client open: {self.db_username}@{self.db_host}:{self.db_port}/{self.db_name}, autocommit={self.conn.autocommit}')
|
|
50
|
+
|
|
51
|
+
def change_autocommit(self, autocommit: bool):
|
|
52
|
+
if autocommit == self.conn.autocommit:
|
|
53
|
+
return
|
|
54
|
+
|
|
55
|
+
self.conn.autocommit = autocommit
|
|
56
|
+
|
|
57
|
+
def execute_query(self, query: str, *params):
|
|
58
|
+
# Make sure connection alive
|
|
59
|
+
if self.conn.closed:
|
|
60
|
+
self.establish_connection(self.conn.autocommit)
|
|
61
|
+
|
|
62
|
+
query = query.strip()
|
|
63
|
+
logger.debug(f'🔎 Query:\n{query}')
|
|
64
|
+
|
|
65
|
+
return self.cursor.execute(query, params)
|
|
66
|
+
|
|
67
|
+
def download_csv(self, query: str, file_path: str) -> None:
|
|
68
|
+
query = dedent(
|
|
69
|
+
f'''
|
|
70
|
+
COPY ({query})
|
|
71
|
+
TO STDOUT
|
|
72
|
+
WITH DELIMITER ','
|
|
73
|
+
CSV HEADER;
|
|
74
|
+
'''
|
|
75
|
+
)
|
|
76
|
+
logger.debug(f'🔎 Query:\n{query}')
|
|
77
|
+
with open(os.path.expanduser(file_path), 'wb') as f:
|
|
78
|
+
with self.cursor.copy(query) as copy:
|
|
79
|
+
for data in copy:
|
|
80
|
+
f.write(data)
|
|
81
|
+
|
|
82
|
+
def pg_to_pg(self, pg: "PG", source_table: str, target_table: str, cols: list[str] = None) -> None:
|
|
83
|
+
tmp_filename = generate_random_string() + '.csv'
|
|
84
|
+
cols_str = ','.join([f'"{x}"' for x in cols]) if (cols is not None and cols != []) else '*'
|
|
85
|
+
try:
|
|
86
|
+
self.download_csv(f'SELECT {cols_str} FROM {source_table}', tmp_filename)
|
|
87
|
+
pg.upload_csv(tmp_filename, target_table)
|
|
88
|
+
except:
|
|
89
|
+
raise
|
|
90
|
+
finally:
|
|
91
|
+
os.remove(tmp_filename) if os.path.exists(tmp_filename) else None
|
|
92
|
+
|
|
93
|
+
def check_table_existence(self, table_name: str) -> bool:
|
|
94
|
+
if not self.execute_query('''SELECT count(1) AS "cnt" FROM "information_schema"."tables" WHERE "table_schema" || '.' || "table_name" = '%s';''', table_name).fetchone()['cnt']:
|
|
95
|
+
raise Exception(f'Target table \'{table_name}\' not created, please create it first!')
|
|
96
|
+
|
|
97
|
+
def upload_tuples(self, cols: list[str], tuples: list[tuple], table_name: str) -> None:
|
|
98
|
+
self.check_table_existence(table_name)
|
|
99
|
+
|
|
100
|
+
cols_str = ','.join([f'"{x}"' for x in cols])
|
|
101
|
+
query = f'''COPY {table_name}({cols_str}) FROM STDIN'''
|
|
102
|
+
logger.debug(f'🔎 Query:\n{query}')
|
|
103
|
+
with self.cursor.copy(query) as copy:
|
|
104
|
+
for row in tuples:
|
|
105
|
+
copy.write_row(row)
|
|
106
|
+
|
|
107
|
+
def upload_list_of_dict(self, data: list[dict], table_name: str) -> None:
|
|
108
|
+
self.check_table_existence(table_name)
|
|
109
|
+
|
|
110
|
+
if len(data) == 0:
|
|
111
|
+
raise ValueError('No data to upload!')
|
|
112
|
+
|
|
113
|
+
cols = data[0].keys()
|
|
114
|
+
cols_str = ','.join([f'"{x}"' for x in cols])
|
|
115
|
+
query = f'''COPY {table_name}({cols_str}) FROM STDIN'''
|
|
116
|
+
logger.debug(f'🔎 Query:\n{query}')
|
|
117
|
+
with self.cursor.copy(query) as copy:
|
|
118
|
+
for row in data:
|
|
119
|
+
copy.write_row(tuple(row[col] for col in cols))
|
|
120
|
+
|
|
121
|
+
def upload_csv(self, file_path: str, table_name: str) -> None:
|
|
122
|
+
self.check_table_existence(table_name)
|
|
123
|
+
|
|
124
|
+
cols_str = ','.join([f'"{x}"' for x in next(csv.reader(open(file_path, 'r')))])
|
|
125
|
+
query = dedent(
|
|
126
|
+
f'''
|
|
127
|
+
COPY {table_name}({cols_str})
|
|
128
|
+
FROM STDIN
|
|
129
|
+
DELIMITER ','
|
|
130
|
+
CSV HEADER;
|
|
131
|
+
'''
|
|
132
|
+
)
|
|
133
|
+
logger.debug(f'🔎 Query:\n{query}')
|
|
134
|
+
with open(os.path.expanduser(file_path), 'r') as f:
|
|
135
|
+
with self.cursor.copy(query) as copy:
|
|
136
|
+
while data := f.read(1024):
|
|
137
|
+
copy.write(data)
|
|
138
|
+
|
|
139
|
+
def create_index(self, table_name: str, index: str | list[str], unique: bool = False) -> None:
|
|
140
|
+
try:
|
|
141
|
+
index = index if type(index) == list else [index]
|
|
142
|
+
indexes = ','.join([f'"{x}"' for x in index])
|
|
143
|
+
self.execute_query(f'CREATE {"UNIQUE " if unique else ""}INDEX ON "{table_name}" ({indexes});', return_df=False)
|
|
144
|
+
except Exception as e:
|
|
145
|
+
self.rollback()
|
|
146
|
+
raise e
|
|
147
|
+
|
|
148
|
+
def rollback(self):
|
|
149
|
+
self.conn.rollback()
|
|
150
|
+
logger.debug('🚫 Transaction rollback')
|
|
151
|
+
|
|
152
|
+
def commit(self):
|
|
153
|
+
self.conn.commit()
|
|
154
|
+
logger.debug('✅ Transaction commit')
|
|
155
|
+
|
|
156
|
+
def close(self):
|
|
157
|
+
self.cursor.close()
|
|
158
|
+
self.conn.close()
|
|
159
|
+
logger.debug('PG client close')
|
utill/my_queue.py
ADDED
|
@@ -0,0 +1,66 @@
|
|
|
1
|
+
import queue
|
|
2
|
+
import concurrent.futures
|
|
3
|
+
|
|
4
|
+
from loguru import logger
|
|
5
|
+
|
|
6
|
+
|
|
7
|
+
class ThreadingQ:
|
|
8
|
+
def __init__(self) -> None:
|
|
9
|
+
self.q = queue.Queue()
|
|
10
|
+
|
|
11
|
+
self.producer_func = None
|
|
12
|
+
self.producer_args = None
|
|
13
|
+
self.consumer_func = None
|
|
14
|
+
|
|
15
|
+
def add_producer(self, func, *args):
|
|
16
|
+
self.producer_func = func
|
|
17
|
+
self.producer_args = args or []
|
|
18
|
+
return self
|
|
19
|
+
|
|
20
|
+
def add_consumer(self, func):
|
|
21
|
+
self.consumer_func = func
|
|
22
|
+
# The consume args is based on producer output
|
|
23
|
+
return self
|
|
24
|
+
|
|
25
|
+
def execute(self):
|
|
26
|
+
if not all([self.producer_func is not None, self.producer_args is not None, self.consumer_func is not None]):
|
|
27
|
+
raise Exception('Producer and Consumer functions must be defined!')
|
|
28
|
+
|
|
29
|
+
def producer():
|
|
30
|
+
results = []
|
|
31
|
+
|
|
32
|
+
for item in self.producer_func(*self.producer_args):
|
|
33
|
+
self.q.put(item)
|
|
34
|
+
results.append(item)
|
|
35
|
+
logger.debug(f'🌾 Produced {item}')
|
|
36
|
+
|
|
37
|
+
self.q.put(None)
|
|
38
|
+
return results
|
|
39
|
+
|
|
40
|
+
def consumer():
|
|
41
|
+
results = []
|
|
42
|
+
|
|
43
|
+
while True:
|
|
44
|
+
item = self.q.get()
|
|
45
|
+
if item is None:
|
|
46
|
+
break
|
|
47
|
+
|
|
48
|
+
result = self.consumer_func(*item)
|
|
49
|
+
results.append(result)
|
|
50
|
+
|
|
51
|
+
self.q.task_done()
|
|
52
|
+
logger.debug(f'🔥 Consumed {item}')
|
|
53
|
+
|
|
54
|
+
return results
|
|
55
|
+
|
|
56
|
+
with concurrent.futures.ThreadPoolExecutor() as executor:
|
|
57
|
+
# Schedule the producer and consumer
|
|
58
|
+
self.future_producer = executor.submit(producer)
|
|
59
|
+
self.future_consumer = executor.submit(consumer)
|
|
60
|
+
|
|
61
|
+
producer_result = self.future_producer.result()
|
|
62
|
+
logger.debug('✅ Producer done')
|
|
63
|
+
consumer_result = self.future_consumer.result()
|
|
64
|
+
logger.debug('✅ Consumer done')
|
|
65
|
+
|
|
66
|
+
return producer_result, consumer_result
|
utill/my_string.py
ADDED
|
@@ -0,0 +1,20 @@
|
|
|
1
|
+
import hashlib
|
|
2
|
+
import random
|
|
3
|
+
import re
|
|
4
|
+
import string
|
|
5
|
+
|
|
6
|
+
|
|
7
|
+
def generate_random_string(length: int = 4, alphanum: bool = False): return ''.join(random.choice(string.ascii_letters + string.digits + (r'!@#$%^&*()-=_+[]{};\':",./<>?' if not alphanum else '')) for _ in range(length))
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
def replace_nonnumeric(string: str, replace: str) -> str: return re.sub('[^0-9a-zA-Z]+', replace, string)
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
def mask(string: str, mask_length_min: int = 5, mask_length_max: int = 50, display_length: int = 5) -> str:
|
|
14
|
+
if not string:
|
|
15
|
+
mask_length = mask_length_min
|
|
16
|
+
else:
|
|
17
|
+
hash_value = int(hashlib.sha256(string.encode()).hexdigest(), 16)
|
|
18
|
+
mask_length = mask_length_min + (hash_value % (mask_length_max - mask_length_min + 1))
|
|
19
|
+
|
|
20
|
+
return ('*' * mask_length) + (string[(-display_length if len(string) > display_length else -1):])
|
utill/my_style.py
ADDED
|
@@ -0,0 +1,39 @@
|
|
|
1
|
+
class Styles:
|
|
2
|
+
NONE = '\033[0m'
|
|
3
|
+
ITALIC = '\033[3m'
|
|
4
|
+
BOLD = '\033[1m'
|
|
5
|
+
UNDERLINE = '\033[4m'
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
class Colors:
|
|
9
|
+
HEADER = '\033[95m'
|
|
10
|
+
OKBLUE = '\033[94m'
|
|
11
|
+
OKCYAN = '\033[96m'
|
|
12
|
+
OKGREEN = '\033[92m'
|
|
13
|
+
WARNING = '\033[93m'
|
|
14
|
+
RED = '\033[91m'
|
|
15
|
+
BOLD = '\033[1m'
|
|
16
|
+
UNDERLINE = '\033[4m'
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
def make_style(styles_or_colors: list[Styles | Colors] | Styles | Colors, string: str) -> str:
|
|
20
|
+
if type(styles_or_colors) == list:
|
|
21
|
+
return ''.join(styles_or_colors) + string + Styles.NONE
|
|
22
|
+
else:
|
|
23
|
+
return styles_or_colors + string + Styles.NONE
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
def bold(string: str) -> str:
|
|
27
|
+
return make_style(Styles.BOLD, string)
|
|
28
|
+
|
|
29
|
+
|
|
30
|
+
def italic(string: str) -> str:
|
|
31
|
+
return make_style(Styles.ITALIC, string)
|
|
32
|
+
|
|
33
|
+
|
|
34
|
+
def underline(string: str) -> str:
|
|
35
|
+
return make_style(Styles.UNDERLINE, string)
|
|
36
|
+
|
|
37
|
+
|
|
38
|
+
def color(string: str, color: Colors) -> str:
|
|
39
|
+
return make_style(color, string)
|
utill/my_tunnel.py
ADDED
|
@@ -0,0 +1,42 @@
|
|
|
1
|
+
import socket
|
|
2
|
+
|
|
3
|
+
from loguru import logger
|
|
4
|
+
from sshtunnel import SSHTunnelForwarder
|
|
5
|
+
|
|
6
|
+
LOCALHOST = '127.0.0.1'
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
def _get_random_port() -> int:
|
|
10
|
+
s = socket.socket()
|
|
11
|
+
s.bind((LOCALHOST, 0))
|
|
12
|
+
return s.getsockname()[1]
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
def start_tunnel(host: str, port: int, user: str, key: str, target_host: str, target_port: int, local_port: int = None) -> int:
|
|
16
|
+
local_port = local_port or _get_random_port()
|
|
17
|
+
|
|
18
|
+
tunnel = SSHTunnelForwarder(
|
|
19
|
+
(host, port),
|
|
20
|
+
ssh_username=user,
|
|
21
|
+
ssh_private_key=key,
|
|
22
|
+
remote_bind_address=(target_host, target_port),
|
|
23
|
+
local_bind_address=(LOCALHOST, local_port),
|
|
24
|
+
)
|
|
25
|
+
|
|
26
|
+
tunnel.start()
|
|
27
|
+
|
|
28
|
+
return (tunnel, LOCALHOST, local_port)
|
|
29
|
+
|
|
30
|
+
|
|
31
|
+
def establish_tunnel(conf: dict, local_port: int = None) -> tuple:
|
|
32
|
+
using_tunnel = bool(conf.get('tunnel_host'))
|
|
33
|
+
local_host = LOCALHOST if using_tunnel else conf['host']
|
|
34
|
+
|
|
35
|
+
z = start_tunnel(conf['tunnel_host'], conf['tunnel_port'], conf['tunnel_username'], conf['tunnel_key'], conf['host'], conf['port'], local_port=local_port)\
|
|
36
|
+
if using_tunnel\
|
|
37
|
+
else (None, local_host, conf['port'])
|
|
38
|
+
|
|
39
|
+
if using_tunnel:
|
|
40
|
+
logger.debug(f'🛣️ Tunnel established: {conf["host"]}:{conf["port"]} --> {conf["tunnel_username"]}@{conf["tunnel_host"]} --> {z[1]}:{z[2]}')
|
|
41
|
+
|
|
42
|
+
return z
|
utill/my_xlsx.py
ADDED
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
import duckdb
|
|
2
|
+
import multiprocessing
|
|
3
|
+
|
|
4
|
+
from loguru import logger
|
|
5
|
+
|
|
6
|
+
|
|
7
|
+
def xlsx_to_csv(filename: str, sheet: str):
|
|
8
|
+
con = duckdb.connect()
|
|
9
|
+
return con.execute('install spatial;')\
|
|
10
|
+
.execute('load spatial;')\
|
|
11
|
+
.execute(f'select * from st_read(\'{filename}\', layer=\'{sheet}\');')\
|
|
12
|
+
.fetchall()
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
def csv_to_xlsx(filename: str, output_file_path: str):
|
|
16
|
+
logger.info(f'Converting csv \'{filename}\' into xlsx \'{output_file_path}\' ...')
|
|
17
|
+
con = duckdb.connect()
|
|
18
|
+
con.execute('install spatial;')\
|
|
19
|
+
.execute('load spatial;')\
|
|
20
|
+
.execute(f'set threads to {multiprocessing.cpu_count()};')\
|
|
21
|
+
.execute(f'copy \'{filename}\' to \'{output_file_path}\' with(format gdal, driver \'xlsx\')')
|