rdxz2-utill 0.1.2__py3-none-any.whl → 0.1.4__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of rdxz2-utill might be problematic. Click here for more details.
- {rdxz2_utill-0.1.2.dist-info → rdxz2_utill-0.1.4.dist-info}/METADATA +2 -1
- rdxz2_utill-0.1.4.dist-info/RECORD +37 -0
- utill/cmd/_bq.py +16 -3
- utill/cmd/_conf.py +15 -15
- utill/cmd/_enc.py +8 -4
- utill/cmd/_mb.py +140 -0
- utill/cmd/_pg.py +4 -2
- utill/cmd/utill.py +203 -61
- utill/my_bq.py +287 -162
- utill/my_compare.py +1 -1
- utill/my_const.py +11 -8
- utill/my_csv.py +31 -15
- utill/my_datetime.py +21 -10
- utill/my_encryption.py +31 -13
- utill/my_env.py +22 -13
- utill/my_file.py +15 -13
- utill/my_gcs.py +40 -16
- utill/my_gdrive.py +195 -0
- utill/my_input.py +8 -4
- utill/my_json.py +6 -6
- utill/my_mb.py +351 -357
- utill/my_pg.py +76 -46
- utill/my_queue.py +37 -24
- utill/my_string.py +23 -5
- utill/my_style.py +18 -16
- utill/my_tunnel.py +29 -9
- utill/my_xlsx.py +11 -8
- rdxz2_utill-0.1.2.dist-info/RECORD +0 -35
- {rdxz2_utill-0.1.2.dist-info → rdxz2_utill-0.1.4.dist-info}/WHEEL +0 -0
- {rdxz2_utill-0.1.2.dist-info → rdxz2_utill-0.1.4.dist-info}/entry_points.txt +0 -0
- {rdxz2_utill-0.1.2.dist-info → rdxz2_utill-0.1.4.dist-info}/licenses/LICENSE +0 -0
- {rdxz2_utill-0.1.2.dist-info → rdxz2_utill-0.1.4.dist-info}/top_level.txt +0 -0
utill/my_compare.py
CHANGED
|
@@ -29,6 +29,6 @@ def same(a, b, float_precision=None) -> tuple[bool, float]:
|
|
|
29
29
|
|
|
30
30
|
return a_float == b_float, abs(a_float - b_float)
|
|
31
31
|
except (ValueError, TypeError):
|
|
32
|
-
raise Exception(f
|
|
32
|
+
raise Exception(f"Can't compare {a} to {b}")
|
|
33
33
|
|
|
34
34
|
return str(a) == str(b), None
|
utill/my_const.py
CHANGED
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
from enum import
|
|
1
|
+
from enum import StrEnum
|
|
2
2
|
|
|
3
3
|
|
|
4
4
|
class ByteSize:
|
|
@@ -8,11 +8,14 @@ class ByteSize:
|
|
|
8
8
|
TB = 1_099_511_627_776
|
|
9
9
|
|
|
10
10
|
|
|
11
|
-
|
|
12
|
-
GET = 1
|
|
13
|
-
POST = 2
|
|
14
|
-
PUT = 3
|
|
15
|
-
DELETE = 4
|
|
11
|
+
from enum import StrEnum
|
|
16
12
|
|
|
17
|
-
|
|
18
|
-
|
|
13
|
+
|
|
14
|
+
class HttpMethod(StrEnum):
|
|
15
|
+
GET = "GET"
|
|
16
|
+
POST = "POST"
|
|
17
|
+
PUT = "PUT"
|
|
18
|
+
DELETE = "DELETE"
|
|
19
|
+
PATCH = "PATCH"
|
|
20
|
+
HEAD = "HEAD"
|
|
21
|
+
OPTIONS = "OPTIONS"
|
utill/my_csv.py
CHANGED
|
@@ -11,25 +11,32 @@ from .my_file import decompress
|
|
|
11
11
|
|
|
12
12
|
def read_header(filename: str):
|
|
13
13
|
filename = os.path.expanduser(filename)
|
|
14
|
-
with open(filename,
|
|
14
|
+
with open(filename, "r") as f:
|
|
15
15
|
csvreader = csv.reader(f)
|
|
16
16
|
return next(csvreader)
|
|
17
17
|
|
|
18
18
|
|
|
19
19
|
def write(filename: str, rows: list[tuple], append: bool = False):
|
|
20
20
|
filename = os.path.expanduser(filename)
|
|
21
|
-
with open(filename,
|
|
21
|
+
with open(filename, "a" if append else "w") as f:
|
|
22
22
|
csvwriter = csv.writer(f)
|
|
23
23
|
csvwriter.writerows(rows)
|
|
24
24
|
|
|
25
25
|
|
|
26
|
-
def compress(
|
|
26
|
+
def compress(
|
|
27
|
+
src_filename: str,
|
|
28
|
+
keep: bool = False,
|
|
29
|
+
max_size_bytes=ByteSize.GB,
|
|
30
|
+
src_fopen=None,
|
|
31
|
+
header=None,
|
|
32
|
+
file_count=1,
|
|
33
|
+
):
|
|
27
34
|
src_filename = os.path.expanduser(src_filename)
|
|
28
35
|
current_size = 0
|
|
29
36
|
dst_filename = f'{src_filename}_part{str(file_count).rjust(6, "0")}.gz'
|
|
30
37
|
os.remove(dst_filename) if os.path.exists(dst_filename) else None
|
|
31
|
-
logger.debug(f
|
|
32
|
-
gz = gzip.open(dst_filename,
|
|
38
|
+
logger.debug(f"📄 Compress csv {src_filename} --> {dst_filename}")
|
|
39
|
+
gz = gzip.open(dst_filename, "wt")
|
|
33
40
|
|
|
34
41
|
src_fopen = src_fopen or open(src_filename)
|
|
35
42
|
header = header or src_fopen.readline()
|
|
@@ -42,14 +49,16 @@ def compress(src_filename: str, keep: bool = False, max_size_bytes=ByteSize.GB,
|
|
|
42
49
|
break
|
|
43
50
|
|
|
44
51
|
gz.write(line)
|
|
45
|
-
current_size += len(line.encode(
|
|
52
|
+
current_size += len(line.encode("utf-8"))
|
|
46
53
|
|
|
47
54
|
if current_size >= max_size_bytes:
|
|
48
55
|
gz.close()
|
|
49
56
|
yield dst_filename
|
|
50
57
|
|
|
51
58
|
file_count += 1
|
|
52
|
-
yield from compress(
|
|
59
|
+
yield from compress(
|
|
60
|
+
src_filename, keep, max_size_bytes, src_fopen, header, file_count
|
|
61
|
+
)
|
|
53
62
|
return
|
|
54
63
|
|
|
55
64
|
gz.close()
|
|
@@ -57,14 +66,21 @@ def compress(src_filename: str, keep: bool = False, max_size_bytes=ByteSize.GB,
|
|
|
57
66
|
yield dst_filename
|
|
58
67
|
|
|
59
68
|
|
|
60
|
-
def combine(
|
|
61
|
-
|
|
69
|
+
def combine(
|
|
70
|
+
src_filenames: list[str],
|
|
71
|
+
dst_filename: str,
|
|
72
|
+
gzip: bool = False,
|
|
73
|
+
delete: bool = False,
|
|
74
|
+
) -> None:
|
|
75
|
+
csv.field_size_limit(
|
|
76
|
+
min(sys.maxsize, 2147483646)
|
|
77
|
+
) # FIX: _csv.Error: field larger than field limit (131072)
|
|
62
78
|
|
|
63
|
-
if not dst_filename.endswith(
|
|
64
|
-
raise ValueError(
|
|
79
|
+
if not dst_filename.endswith(".csv"):
|
|
80
|
+
raise ValueError("Output filename must ends with '.csv'!")
|
|
65
81
|
|
|
66
82
|
first_src_file = True
|
|
67
|
-
with open(dst_filename,
|
|
83
|
+
with open(dst_filename, "w") as fout:
|
|
68
84
|
csvwriter = csv.writer(fout)
|
|
69
85
|
|
|
70
86
|
for src_filename in src_filenames:
|
|
@@ -75,7 +91,7 @@ def combine(src_filenames: list[str], dst_filename: str, gzip: bool = False, del
|
|
|
75
91
|
src_filename = decompress(src_filename)
|
|
76
92
|
|
|
77
93
|
# Write content into file
|
|
78
|
-
with open(src_filename,
|
|
94
|
+
with open(src_filename, "r") as fin:
|
|
79
95
|
csvreader = csv.reader(fin)
|
|
80
96
|
|
|
81
97
|
# Write header only at first file
|
|
@@ -88,8 +104,8 @@ def combine(src_filenames: list[str], dst_filename: str, gzip: bool = False, del
|
|
|
88
104
|
# Write body
|
|
89
105
|
[csvwriter.writerow(row) for row in csvreader]
|
|
90
106
|
|
|
91
|
-
logger.debug(f
|
|
107
|
+
logger.debug(f"Combine {src_filename}")
|
|
92
108
|
|
|
93
109
|
if delete:
|
|
94
110
|
os.remove(src_filename)
|
|
95
|
-
logger.debug(f
|
|
111
|
+
logger.debug(f"Delete {src_filename}")
|
utill/my_datetime.py
CHANGED
|
@@ -8,28 +8,35 @@ class Level(Enum):
|
|
|
8
8
|
|
|
9
9
|
|
|
10
10
|
def get_current_date_str(use_separator: bool = False) -> str:
|
|
11
|
-
return datetime.now().strftime(
|
|
11
|
+
return datetime.now().strftime("%Y-%m-%d" if use_separator else "%Y%m%d")
|
|
12
12
|
|
|
13
13
|
|
|
14
14
|
def get_current_datetime_str(use_separator: bool = False) -> str:
|
|
15
|
-
return datetime.now().strftime(
|
|
15
|
+
return datetime.now().strftime(
|
|
16
|
+
"%Y-%m-%d %H:%M:%S" if use_separator else "%Y%m%d%H%M%S"
|
|
17
|
+
)
|
|
16
18
|
|
|
17
19
|
|
|
18
20
|
def get_month_first_and_last_day(string: str) -> tuple:
|
|
19
21
|
try:
|
|
20
|
-
dt = datetime.strptime(string,
|
|
22
|
+
dt = datetime.strptime(string, "%Y-%m")
|
|
21
23
|
except ValueError:
|
|
22
|
-
dt = datetime.strptime(string,
|
|
24
|
+
dt = datetime.strptime(string, "%Y-%m-%d").replace(day=1)
|
|
23
25
|
|
|
24
26
|
return (dt, (dt + timedelta(days=32)).replace(day=1) - timedelta(days=1))
|
|
25
27
|
|
|
26
28
|
|
|
27
|
-
def generate_dates(
|
|
29
|
+
def generate_dates(
|
|
30
|
+
start_date: date | str,
|
|
31
|
+
end_date: date | str,
|
|
32
|
+
level: Level,
|
|
33
|
+
is_output_strings: bool = False,
|
|
34
|
+
):
|
|
28
35
|
# Auto convert strings
|
|
29
36
|
if type(start_date) == str:
|
|
30
|
-
start_date = datetime.strptime(start_date,
|
|
37
|
+
start_date = datetime.strptime(start_date, "%Y-%m-%d").date()
|
|
31
38
|
if type(end_date) == str:
|
|
32
|
-
end_date = datetime.strptime(end_date,
|
|
39
|
+
end_date = datetime.strptime(end_date, "%Y-%m-%d").date()
|
|
33
40
|
|
|
34
41
|
# Auto convert datetime
|
|
35
42
|
if type(start_date) == datetime:
|
|
@@ -38,7 +45,9 @@ def generate_dates(start_date: date | str, end_date: date | str, level: Level, i
|
|
|
38
45
|
end_date = end_date.date()
|
|
39
46
|
|
|
40
47
|
if start_date > end_date:
|
|
41
|
-
raise ValueError(
|
|
48
|
+
raise ValueError(
|
|
49
|
+
f"start_date '{start_date}' cannot be larger than end_date '{end_date}'"
|
|
50
|
+
)
|
|
42
51
|
|
|
43
52
|
dates: list[date] = []
|
|
44
53
|
|
|
@@ -55,9 +64,11 @@ def generate_dates(start_date: date | str, end_date: date | str, level: Level, i
|
|
|
55
64
|
dates.append(end_date)
|
|
56
65
|
end_date = end_date - timedelta(days=1)
|
|
57
66
|
case _:
|
|
58
|
-
raise ValueError(
|
|
67
|
+
raise ValueError(
|
|
68
|
+
f"level '{level}' not recognized. available levels are: 'day', 'month'"
|
|
69
|
+
)
|
|
59
70
|
|
|
60
71
|
if is_output_strings:
|
|
61
|
-
return sorted([date.strftime(
|
|
72
|
+
return sorted([date.strftime("%Y-%m-%d") for date in dates])
|
|
62
73
|
else:
|
|
63
74
|
return sorted(dates)
|
utill/my_encryption.py
CHANGED
|
@@ -5,16 +5,26 @@ from loguru import logger
|
|
|
5
5
|
|
|
6
6
|
|
|
7
7
|
def __fernet_encrypt_or_decrypt(encrypt: bool, string: str, password: str):
|
|
8
|
-
return
|
|
9
|
-
|
|
10
|
-
|
|
11
|
-
|
|
8
|
+
return (
|
|
9
|
+
Fernet(password).encrypt(string.encode())
|
|
10
|
+
if encrypt
|
|
11
|
+
else Fernet(password).encrypt(string.encode())
|
|
12
|
+
)
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
def __file_encrypt_or_decrypt(
|
|
16
|
+
encrypt: bool,
|
|
17
|
+
src_filename: str,
|
|
18
|
+
password: str,
|
|
19
|
+
dst_filename: str = None,
|
|
20
|
+
overwrite: bool = False,
|
|
21
|
+
):
|
|
12
22
|
src_filename = os.path.expanduser(src_filename)
|
|
13
23
|
|
|
14
24
|
if not os.path.exists(src_filename):
|
|
15
|
-
return ValueError(f
|
|
25
|
+
return ValueError(f"Source file not exists: {src_filename}")
|
|
16
26
|
|
|
17
|
-
with open(src_filename,
|
|
27
|
+
with open(src_filename, "r") as fr:
|
|
18
28
|
# If destination file is not specified, return the encrypted string
|
|
19
29
|
if not dst_filename:
|
|
20
30
|
return __fernet_encrypt_or_decrypt(encrypt, fr.read(), password)
|
|
@@ -25,23 +35,31 @@ def __file_encrypt_or_decrypt(encrypt: bool, src_filename: str, password: str, d
|
|
|
25
35
|
# Destination file exists checker
|
|
26
36
|
if os.path.exists(dst_filename):
|
|
27
37
|
if overwrite:
|
|
28
|
-
return ValueError(f
|
|
38
|
+
return ValueError(f"Destination file exists: {dst_filename}")
|
|
29
39
|
else:
|
|
30
40
|
os.remove(dst_filename)
|
|
31
41
|
|
|
32
|
-
with open(dst_filename,
|
|
42
|
+
with open(dst_filename, "w") as fw:
|
|
33
43
|
fw.write(__fernet_encrypt_or_decrypt(encrypt, fr.read()), password)
|
|
34
44
|
|
|
35
|
-
logger.info(f
|
|
45
|
+
logger.info(f"Encrypted into {dst_filename}")
|
|
36
46
|
return dst_filename
|
|
37
47
|
|
|
38
48
|
|
|
39
|
-
def encrypt_file(
|
|
40
|
-
|
|
49
|
+
def encrypt_file(
|
|
50
|
+
src_filename: str, password: str, dst_filename: str = None, overwrite: bool = False
|
|
51
|
+
) -> str:
|
|
52
|
+
return __file_encrypt_or_decrypt(
|
|
53
|
+
True, src_filename, password, dst_filename, overwrite
|
|
54
|
+
)
|
|
41
55
|
|
|
42
56
|
|
|
43
|
-
def decrypt_file(
|
|
44
|
-
|
|
57
|
+
def decrypt_file(
|
|
58
|
+
src_filename: str, password: str, dst_filename: str = None, overwrite: bool = False
|
|
59
|
+
) -> str:
|
|
60
|
+
return __file_encrypt_or_decrypt(
|
|
61
|
+
False, src_filename, password, dst_filename, overwrite
|
|
62
|
+
)
|
|
45
63
|
|
|
46
64
|
|
|
47
65
|
def encrypt_string(string: str, password: str) -> str:
|
utill/my_env.py
CHANGED
|
@@ -7,12 +7,16 @@ from typing import Optional
|
|
|
7
7
|
|
|
8
8
|
from .my_input import ask_yes_no
|
|
9
9
|
|
|
10
|
-
ENV_DIR = os.path.expanduser(os.path.join(
|
|
11
|
-
ENV_FILE = os.path.join(ENV_DIR,
|
|
10
|
+
ENV_DIR = os.path.expanduser(os.path.join("~", ".utill"))
|
|
11
|
+
ENV_FILE = os.path.join(ENV_DIR, "env")
|
|
12
12
|
|
|
13
|
-
TEMPLATE_DIR =
|
|
14
|
-
TEMPLATE_PG_FILENAME = os.path.join(
|
|
15
|
-
|
|
13
|
+
TEMPLATE_DIR = "templates"
|
|
14
|
+
TEMPLATE_PG_FILENAME = os.path.join(
|
|
15
|
+
os.path.dirname(__file__), TEMPLATE_DIR, "pg.json"
|
|
16
|
+
) # PostgreSQL connections
|
|
17
|
+
TEMPLATE_MB_FILENAME = os.path.join(
|
|
18
|
+
os.path.dirname(__file__), TEMPLATE_DIR, "mb.json"
|
|
19
|
+
) # Metabase connections
|
|
16
20
|
|
|
17
21
|
PG_FILENAME = os.path.join(ENV_DIR, os.path.basename(TEMPLATE_PG_FILENAME))
|
|
18
22
|
MB_FILENAME = os.path.join(ENV_DIR, os.path.basename(TEMPLATE_MB_FILENAME))
|
|
@@ -24,26 +28,26 @@ if not os.path.exists(ENV_DIR):
|
|
|
24
28
|
|
|
25
29
|
def init_pg_file():
|
|
26
30
|
if os.path.exists(PG_FILENAME):
|
|
27
|
-
if ask_yes_no(f
|
|
31
|
+
if ask_yes_no(f"PostgreSQL connection file exists: {PG_FILENAME}, overwrite?"):
|
|
28
32
|
shutil.copy(TEMPLATE_PG_FILENAME, PG_FILENAME)
|
|
29
|
-
logger.warning(f
|
|
33
|
+
logger.warning(f"PostgreSQL connection file overwritten! {PG_FILENAME}")
|
|
30
34
|
else:
|
|
31
35
|
return
|
|
32
36
|
|
|
33
37
|
shutil.copy(TEMPLATE_PG_FILENAME, PG_FILENAME)
|
|
34
|
-
logger.info(f
|
|
38
|
+
logger.info(f"PostgreSQL connection file created: {PG_FILENAME}")
|
|
35
39
|
|
|
36
40
|
|
|
37
41
|
def init_mb_file():
|
|
38
42
|
if os.path.exists(MB_FILENAME):
|
|
39
|
-
if ask_yes_no(f
|
|
43
|
+
if ask_yes_no(f"Metabase connection file exists: {MB_FILENAME}, overwrite?"):
|
|
40
44
|
shutil.copy(TEMPLATE_MB_FILENAME, MB_FILENAME)
|
|
41
|
-
logger.warning(f
|
|
45
|
+
logger.warning(f"Metabase connection file overwritten! {MB_FILENAME}")
|
|
42
46
|
else:
|
|
43
47
|
return
|
|
44
48
|
|
|
45
49
|
shutil.copy(TEMPLATE_MB_FILENAME, MB_FILENAME)
|
|
46
|
-
logger.info(f
|
|
50
|
+
logger.info(f"Metabase connection file created: {MB_FILENAME}")
|
|
47
51
|
|
|
48
52
|
|
|
49
53
|
class Envs(BaseSettings):
|
|
@@ -56,8 +60,13 @@ class Envs(BaseSettings):
|
|
|
56
60
|
setattr(self, k, v)
|
|
57
61
|
|
|
58
62
|
def write(self):
|
|
59
|
-
with open(ENV_FILE,
|
|
60
|
-
data =
|
|
63
|
+
with open(ENV_FILE, "w") as f:
|
|
64
|
+
data = "\n".join(
|
|
65
|
+
[
|
|
66
|
+
'{}="{}"'.format(k, str(getattr(self, k)).replace('"', '\\"'))
|
|
67
|
+
for k in self.model_fields.keys()
|
|
68
|
+
]
|
|
69
|
+
)
|
|
61
70
|
f.write(data)
|
|
62
71
|
|
|
63
72
|
class Config:
|
utill/my_file.py
CHANGED
|
@@ -7,12 +7,12 @@ from loguru import logger
|
|
|
7
7
|
|
|
8
8
|
def compress(src_file: str, keep: bool = False):
|
|
9
9
|
src_file = os.path.expanduser(src_file)
|
|
10
|
-
dst_file = src_file +
|
|
10
|
+
dst_file = src_file + ".gz"
|
|
11
11
|
|
|
12
12
|
os.remove(dst_file) if os.path.exists(dst_file) else None
|
|
13
|
-
logger.debug(f
|
|
14
|
-
with open(src_file,
|
|
15
|
-
with gzip.open(dst_file,
|
|
13
|
+
logger.debug(f"📄 Compress {dst_file} --> {dst_file}")
|
|
14
|
+
with open(src_file, "rb") as f_in:
|
|
15
|
+
with gzip.open(dst_file, "wb") as f_out:
|
|
16
16
|
shutil.copyfileobj(f_in, f_out)
|
|
17
17
|
|
|
18
18
|
os.remove(src_file) if not keep else None
|
|
@@ -21,16 +21,16 @@ def compress(src_file: str, keep: bool = False):
|
|
|
21
21
|
|
|
22
22
|
|
|
23
23
|
def decompress(src_file: str, keep: bool = False):
|
|
24
|
-
if not src_file.endswith(
|
|
25
|
-
raise ValueError(
|
|
24
|
+
if not src_file.endswith(".gz"):
|
|
25
|
+
raise ValueError("File name not ends with .gz!")
|
|
26
26
|
|
|
27
27
|
src_file = os.path.expanduser(src_file)
|
|
28
|
-
dst_file = src_file.removesuffix(
|
|
28
|
+
dst_file = src_file.removesuffix(".gz")
|
|
29
29
|
|
|
30
30
|
os.remove(dst_file) if os.path.exists(dst_file) else None
|
|
31
|
-
logger.debug(f
|
|
32
|
-
with gzip.open(src_file,
|
|
33
|
-
with open(dst_file,
|
|
31
|
+
logger.debug(f"Decompress {src_file} to {dst_file}")
|
|
32
|
+
with gzip.open(src_file, "rb") as f_in:
|
|
33
|
+
with open(dst_file, "wb") as f_out:
|
|
34
34
|
shutil.copyfileobj(f_in, f_out)
|
|
35
35
|
|
|
36
36
|
os.remove(src_file) if not keep else None
|
|
@@ -45,15 +45,17 @@ def make_sure_directory_exists(dirname: str):
|
|
|
45
45
|
|
|
46
46
|
def make_sure_path_is_directory(path: str):
|
|
47
47
|
if not path.endswith(os.sep):
|
|
48
|
-
raise ValueError(
|
|
48
|
+
raise ValueError(
|
|
49
|
+
f"Please specify directory name ending with '{os.sep}' character, example for Linux: '/home/my_username/Downloads/my_folder/'!"
|
|
50
|
+
)
|
|
49
51
|
|
|
50
52
|
|
|
51
53
|
def read_last_line(filename: str) -> str:
|
|
52
54
|
filename = os.path.expanduser(filename)
|
|
53
|
-
with open(filename,
|
|
55
|
+
with open(filename, "rb") as f:
|
|
54
56
|
try: # Catch OSError in case of a one line file
|
|
55
57
|
f.seek(-2, os.SEEK_END)
|
|
56
|
-
while f.read(1) != b
|
|
58
|
+
while f.read(1) != b"\n":
|
|
57
59
|
f.seek(-2, os.SEEK_CUR)
|
|
58
60
|
except OSError:
|
|
59
61
|
f.seek(0)
|
utill/my_gcs.py
CHANGED
|
@@ -9,14 +9,18 @@ class GCS:
|
|
|
9
9
|
|
|
10
10
|
def __init__(self, bucket: str | None = None, project_id: str | None = None):
|
|
11
11
|
if project_id is None and envs.GCP_PROJECT_ID is None:
|
|
12
|
-
logger.warning(
|
|
12
|
+
logger.warning("Using ADC for GCS authentication")
|
|
13
13
|
|
|
14
14
|
if bucket is None and envs.GCS_BUCKET is None:
|
|
15
|
-
raise ValueError(
|
|
15
|
+
raise ValueError(
|
|
16
|
+
"Bucket name must be provided either as an argument or set in environment variables."
|
|
17
|
+
)
|
|
16
18
|
|
|
17
19
|
self.client = storage.Client(project=project_id or envs.GCP_PROJECT_ID)
|
|
18
20
|
self.bucket = self.client.bucket(bucket or envs.GCS_BUCKET)
|
|
19
|
-
logger.debug(
|
|
21
|
+
logger.debug(
|
|
22
|
+
f"GCS client open, project: {self.client.project}, bucket: {self.bucket.name}"
|
|
23
|
+
)
|
|
20
24
|
|
|
21
25
|
def get_blob(self, blobpath: str) -> storage.Blob:
|
|
22
26
|
return self.bucket.blob(blobpath)
|
|
@@ -28,7 +32,13 @@ class GCS:
|
|
|
28
32
|
blob = self.get_blob(blobpath) if isinstance(blobpath, str) else blobpath
|
|
29
33
|
return blob.delete()
|
|
30
34
|
|
|
31
|
-
def copy(
|
|
35
|
+
def copy(
|
|
36
|
+
self,
|
|
37
|
+
src_blobpath: str,
|
|
38
|
+
dst_blobpath: str,
|
|
39
|
+
dst_bucket: str = None,
|
|
40
|
+
move: bool = False,
|
|
41
|
+
):
|
|
32
42
|
src_bucket = self.bucket
|
|
33
43
|
src_blob = self.get_blob(src_blobpath)
|
|
34
44
|
dst_bucket = dst_bucket or src_bucket.name
|
|
@@ -38,10 +48,14 @@ class GCS:
|
|
|
38
48
|
# Move mode
|
|
39
49
|
if move:
|
|
40
50
|
self.delete_blob(src_blobpath)
|
|
41
|
-
logger.debug(
|
|
51
|
+
logger.debug(
|
|
52
|
+
f"Moved gs://{src_bucket}/{src_blobpath} to gs://{dst_bucket}/{dst_blobpath}"
|
|
53
|
+
)
|
|
42
54
|
# Copy mode
|
|
43
55
|
else:
|
|
44
|
-
logger.debug(
|
|
56
|
+
logger.debug(
|
|
57
|
+
f"Copied gs://{src_bucket}/{src_blobpath} to gs://{dst_bucket}/{dst_blobpath}"
|
|
58
|
+
)
|
|
45
59
|
|
|
46
60
|
def upload(self, src_filepath: str, dst_blobpath: str, move: bool = False):
|
|
47
61
|
blob = self.get_blob(dst_blobpath)
|
|
@@ -50,30 +64,40 @@ class GCS:
|
|
|
50
64
|
# Move mode
|
|
51
65
|
if move:
|
|
52
66
|
os.remove(src_filepath)
|
|
53
|
-
logger.debug(f
|
|
67
|
+
logger.debug(f"Moved {src_filepath} to gs://{self.bucket.name}/{blob.name}")
|
|
54
68
|
# Copy mode
|
|
55
69
|
else:
|
|
56
|
-
logger.debug(
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
|
|
70
|
+
logger.debug(
|
|
71
|
+
f"Uploaded {src_filepath} to gs://{self.bucket.name}/{blob.name}"
|
|
72
|
+
)
|
|
73
|
+
|
|
74
|
+
def download(
|
|
75
|
+
self, src_blobpath: str | storage.Blob, dst_filepath: str, move: bool = False
|
|
76
|
+
):
|
|
77
|
+
blob = (
|
|
78
|
+
self.get_blob(src_blobpath)
|
|
79
|
+
if isinstance(src_blobpath, str)
|
|
80
|
+
else src_blobpath
|
|
81
|
+
)
|
|
60
82
|
blob.download_to_filename(dst_filepath)
|
|
61
83
|
|
|
62
84
|
if move:
|
|
63
85
|
self.delete_blob(blob)
|
|
64
|
-
logger.debug(f
|
|
86
|
+
logger.debug(f"Moved gs://{self.bucket.name}/{blob.name} to {dst_filepath}")
|
|
65
87
|
else:
|
|
66
|
-
logger.debug(
|
|
88
|
+
logger.debug(
|
|
89
|
+
f"Copied gs://{self.bucket.name}/{blob.name} to {dst_filepath}"
|
|
90
|
+
)
|
|
67
91
|
|
|
68
92
|
# MARK: Utilities
|
|
69
93
|
|
|
70
94
|
@staticmethod
|
|
71
|
-
def build_tmp_dirpath(prefix: str =
|
|
95
|
+
def build_tmp_dirpath(prefix: str = "tmp") -> str:
|
|
72
96
|
"""
|
|
73
97
|
Builds a temporary directory path in the GCS bucket.
|
|
74
98
|
"""
|
|
75
|
-
return f
|
|
99
|
+
return f"{prefix}/{get_current_datetime_str()}"
|
|
76
100
|
|
|
77
101
|
def close(self):
|
|
78
102
|
self.client.close()
|
|
79
|
-
logger.debug(
|
|
103
|
+
logger.debug("GCS client closed")
|