rdxz2-utill 0.0.3__py3-none-any.whl → 0.1.5__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
utill/my_gcs.py CHANGED
@@ -1,117 +1,105 @@
1
1
  import os
2
- import re
3
2
 
4
3
  from google.cloud import storage
5
4
  from loguru import logger
6
5
 
6
+ from .my_datetime import get_current_datetime_str
7
7
  from .my_env import envs
8
+ from .my_string import generate_random_string
8
9
 
9
10
 
10
11
  class GCS:
12
+ def __init__(self, bucket: str | None = None, project_id: str | None = None):
13
+ if project_id is None and envs.GCP_PROJECT_ID is None:
14
+ logger.warning("Using ADC for GCS authentication")
15
+
16
+ if bucket is None and envs.GCS_BUCKET is None:
17
+ raise ValueError(
18
+ "Bucket name must be provided either as an argument or set in environment variables."
19
+ )
20
+
21
+ self.client = storage.Client(project=project_id or envs.GCP_PROJECT_ID)
22
+ self.bucket = self.client.bucket(bucket or envs.GCS_BUCKET)
23
+ logger.debug(
24
+ f"GCS client open, project: {self.client.project}, bucket: {self.bucket.name}"
25
+ )
26
+
27
+ def get_blob(self, blobpath: str) -> storage.Blob:
28
+ return self.bucket.blob(blobpath)
29
+
30
+ def list_blobs(self, prefix: str) -> list[storage.Blob]:
31
+ return self.bucket.list_blobs(prefix=prefix)
32
+
33
+ def delete_blob(self, blobpath: str | storage.Blob) -> storage.Blob:
34
+ blob = self.get_blob(blobpath) if isinstance(blobpath, str) else blobpath
35
+ return blob.delete()
36
+
37
+ def copy(
38
+ self,
39
+ src_blobpath: str,
40
+ dst_blobpath: str,
41
+ dst_bucket: str = None,
42
+ move: bool = False,
43
+ ):
44
+ src_bucket = self.bucket
45
+ src_blob = self.get_blob(src_blobpath)
46
+ dst_bucket = dst_bucket or src_bucket.name
47
+
48
+ self.bucket.copy_blob(src_blob, dst_bucket, dst_blobpath)
49
+
50
+ # Move mode
51
+ if move:
52
+ self.delete_blob(src_blobpath)
53
+ logger.debug(
54
+ f"Moved gs://{src_bucket}/{src_blobpath} to gs://{dst_bucket}/{dst_blobpath}"
55
+ )
56
+ # Copy mode
57
+ else:
58
+ logger.debug(
59
+ f"Copied gs://{src_bucket}/{src_blobpath} to gs://{dst_bucket}/{dst_blobpath}"
60
+ )
61
+
62
+ def upload(self, src_filepath: str, dst_blobpath: str, move: bool = False):
63
+ blob = self.get_blob(dst_blobpath)
64
+ blob.upload_from_filename(src_filepath)
65
+
66
+ # Move mode
67
+ if move:
68
+ os.remove(src_filepath)
69
+ logger.debug(f"Moved {src_filepath} to gs://{self.bucket.name}/{blob.name}")
70
+ # Copy mode
71
+ else:
72
+ logger.debug(
73
+ f"Uploaded {src_filepath} to gs://{self.bucket.name}/{blob.name}"
74
+ )
75
+
76
+ def download(
77
+ self, src_blobpath: str | storage.Blob, dst_filepath: str, move: bool = False
78
+ ):
79
+ blob = (
80
+ self.get_blob(src_blobpath)
81
+ if isinstance(src_blobpath, str)
82
+ else src_blobpath
83
+ )
84
+ blob.download_to_filename(dst_filepath)
85
+
86
+ if move:
87
+ self.delete_blob(blob)
88
+ logger.debug(f"Moved gs://{self.bucket.name}/{blob.name} to {dst_filepath}")
89
+ else:
90
+ logger.debug(
91
+ f"Copied gs://{self.bucket.name}/{blob.name} to {dst_filepath}"
92
+ )
93
+
94
+ # MARK: Utilities
11
95
 
12
- def __init__(self, project: str = None, bucket_name: str = None):
13
- self.project = project if project is not None else envs.GCP_PROJECT_ID
14
- self.client = storage.Client(project=self.project)
15
-
16
- bucket_name_parts = (bucket_name or envs.GCS_BUCKET).split('/')
17
- self.change_bucket(bucket_name_parts[0])
18
- self.base_path = '/'.join(bucket_name_parts[1:]) if len(bucket_name_parts) > 1 else None
19
- not self.base_path or logger.debug(f'Base path: {self.base_path}')
20
-
21
- logger.debug(f'GCS client open, project: {project or "<application-default>"}')
22
-
23
- def __enter__(self):
24
- return self
25
-
26
- def __exit__(self, exc_type, exc_value, exc_tb):
27
- self.close_client()
28
-
29
- def _construct_path(self, path: str) -> str:
30
- return f'{self.base_path}/{path}' if self.base_path else path
31
-
32
- def change_bucket(self, bucket_name: str):
33
- self.bucket = self.client.bucket(bucket_name)
34
- logger.debug(f'Change bucket to {self.bucket.name}')
35
-
36
- def get(self, path: str) -> storage.Blob:
37
- path = self._construct_path(path)
38
- return self.bucket.blob(path)
39
-
40
- def list(self, path: str) -> list[storage.Blob]:
41
- path = self._construct_path(path)
42
- if '*' in path:
43
- path_prefix = path.split('*')[0]
44
- regex_pattern = '^' + re.escape(path).replace('\\*', '.*') + '$'
45
- regex = re.compile(regex_pattern)
46
- return [x for x in self.bucket.list_blobs(prefix=path_prefix) if regex.match(x.name)]
47
-
48
- return list(self.bucket.list_blobs(prefix=path))
49
-
50
- def copy(self, src_path: str, dst_path: str, mv: bool = False):
51
- src_blob = self.get(src_path)
52
- dst_blob = self.get(dst_path)
53
-
54
- dst_blob.rewrite(src_blob)
55
-
56
- logger.debug(f'✅ Copy gs://{src_blob.bucket.name}/{src_blob.name} to gs://{dst_blob.bucket.name}/{dst_blob.name}')
57
-
58
- not mv or GCS.remove_blob(src_blob)
59
-
60
- return dst_blob
61
-
62
- def copy_to_other_gcs(self, src_blob: storage.Blob, dst_gcs: "GCS", dst_path: str, mv: bool = False):
63
- self.bucket.copy_blob(src_blob, dst_gcs.bucket, dst_path)
64
- dst_blob = dst_gcs.get(dst_path)
65
-
66
- not mv or GCS.remove_blob(src_blob)
67
-
68
- return dst_blob
69
-
70
- def upload(self, local_path: str, remote_path: str, mv: bool = False):
71
- local_path = os.path.expanduser(local_path)
72
-
73
- if not os.path.exists(local_path):
74
- raise FileNotFoundError(f'File not found: {local_path}')
75
-
76
- blob = self.get(remote_path)
77
- blob.upload_from_filename(local_path)
78
-
79
- logger.debug(f'✅ Upload {local_path} to gs://{self.bucket.name}/{blob.name}')
80
-
81
- not mv or os.remove(local_path)
82
-
83
- return blob
84
-
85
- def download(self, obj: str | storage.Blob, local_path: str, mv: bool = False):
86
- local_path = os.path.expanduser(local_path)
87
- is_blob = type(obj) == storage.Blob
88
-
89
- if os.path.isdir(local_path):
90
- local_path = os.path.join(local_path, obj.name.split('/')[-1] if is_blob else os.path.basename(obj))
91
- if not os.path.dirname(local_path):
92
- raise FileNotFoundError(f'Destination directory not found: {os.path.dirname(local_path)}')
93
-
94
- blob = obj if is_blob else self.get(obj)
95
- blob.download_to_filename(local_path)
96
-
97
- logger.debug(f'✅ Download gs://{self.bucket.name}/{blob.name} to {local_path}')
98
-
99
- not mv or GCS.remove_blob(blob)
100
-
101
- return blob
102
-
103
- def remove(self, remote_path: str):
104
- blob = self.get(remote_path)
105
-
106
- GCS.remove_blob(blob)
107
-
108
- return blob
96
+ @staticmethod
97
+ def build_tmp_dirpath(prefix: str = "tmp") -> str:
98
+ """
99
+ Builds a temporary directory path in the GCS bucket.
100
+ """
101
+ return f"{prefix}/{get_current_datetime_str()}_{generate_random_string(alphanum=True)}"
109
102
 
110
- def close_client(self):
103
+ def close(self):
111
104
  self.client.close()
112
- logger.debug('GCS client close')
113
-
114
- @staticmethod
115
- def remove_blob(blob: storage.Blob):
116
- blob.delete()
117
- logger.debug(f'🗑️ Remove gs://{blob.bucket.name}/{blob.name}')
105
+ logger.debug("GCS client closed")
utill/my_gdrive.py ADDED
@@ -0,0 +1,196 @@
1
+ import enum
2
+ import logging
3
+ import os
4
+
5
+ from google.auth import default
6
+ from googleapiclient.discovery import build
7
+ from googleapiclient.http import MediaFileUpload
8
+ from googleapiclient.http import MediaIoBaseDownload
9
+ from humanize import naturalsize
10
+
11
+
12
+ log = logging.getLogger(__name__)
13
+
14
+
15
+ class Role(enum.StrEnum):
16
+ READER = "reader"
17
+ WRITER = "writer"
18
+ COMMENTER = "commenter"
19
+ OWNER = "owner"
20
+
21
+
22
+ class GDrive:
23
+ """
24
+ Custom hook for Google Drive integration in Airflow.
25
+ This hook can be used to interact with Google Drive APIs.
26
+ """
27
+
28
+ def __init__(self):
29
+ credentials, project = default(
30
+ scopes=[
31
+ "https://www.googleapis.com/auth/drive",
32
+ "https://www.googleapis.com/auth/drive.file",
33
+ ]
34
+ )
35
+ drive_service = build("drive", "v3", credentials=credentials)
36
+ self.connection = drive_service
37
+
38
+ # region Folder operations
39
+
40
+ def get_folder_by_name(self, *, parent_folder_id: str, name: str) -> str | None:
41
+ """
42
+ Retrieves a folder by its name within a specified Google Drive folder.
43
+ :param folder_id: The ID of the parent folder to search in.
44
+ :param name: The name of the folder to find.
45
+ :return: The ID of the found folder or None if not found.
46
+ """
47
+ query = f"'{parent_folder_id}' in parents and name='{name}' and mimeType='application/vnd.google-apps.folder' and trashed=false"
48
+ results = (
49
+ self.connection.files()
50
+ .list(q=query, fields="files(id)", supportsAllDrives=True)
51
+ .execute()
52
+ )
53
+ items = results.get("files", [])
54
+
55
+ return items[0]["id"] if items else None
56
+
57
+ def create_folder(
58
+ self, folder_name: str, parent_folder_id: str | None = None
59
+ ) -> str:
60
+ """
61
+ Creates a folder in Google Drive.
62
+ :param folder_name: The name of the folder to create.
63
+ :param parent_folder_id: The ID of the parent folder (optional).
64
+ :return: The ID of the created folder.
65
+ """
66
+ file_metadata = {
67
+ "name": folder_name,
68
+ "mimeType": "application/vnd.google-apps.folder",
69
+ }
70
+ if parent_folder_id:
71
+ file_metadata["parents"] = [parent_folder_id]
72
+
73
+ file = (
74
+ self.connection.files()
75
+ .create(body=file_metadata, fields="id", supportsAllDrives=True)
76
+ .execute()
77
+ )
78
+ log.debug(
79
+ f"Folder {folder_name} created under {self.generate_gdrive_folder_url(parent_folder_id)}"
80
+ )
81
+ return file.get("id")
82
+
83
+ def grant_folder_access(
84
+ self,
85
+ folder_id: str,
86
+ email: str,
87
+ role: Role = Role.READER,
88
+ send_notification_email: bool = False,
89
+ ):
90
+ """
91
+ Grants access to a Google Drive folder to a user by email.
92
+ :param folder_id: The ID of the folder to grant access to.
93
+ :param email: The email address of the user to grant access to.
94
+ :param role: The role to assign (reader, writer, commenter, owner).
95
+ """
96
+ self.connection.permissions().create(
97
+ fileId=folder_id,
98
+ body={
99
+ "type": "user",
100
+ "role": role,
101
+ "emailAddress": email,
102
+ },
103
+ sendNotificationEmail=send_notification_email,
104
+ supportsAllDrives=True,
105
+ ).execute()
106
+ log.debug(
107
+ f"Granted {role} access to {email} for folder {self.generate_gdrive_folder_url(folder_id)}"
108
+ )
109
+
110
+ # endregion
111
+
112
+ # region File operations
113
+
114
+ def get_file(self, file_id: str):
115
+ raise NotImplementedError()
116
+
117
+ def list_files(self, folder_id: str, mime_type: str | None = None):
118
+ """
119
+ Lists files in a specified Google Drive folder.
120
+ :param folder_id: The ID of the folder to search in.
121
+ :param mime_type: Optional MIME type to filter files by.
122
+ :return: A list of files in the specified folder.
123
+ """
124
+ query = f"'{folder_id}' in parents and trashed=false"
125
+ if mime_type:
126
+ query += f" and mimeType='{mime_type}'"
127
+
128
+ results = (
129
+ self.connection.files()
130
+ .list(q=query, fields="files(id, name)", supportsAllDrives=True)
131
+ .execute()
132
+ )
133
+ return results.get("files", [])
134
+
135
+ def upload_file(
136
+ self, src_filepath: str, folder_id: str, mime_type: str | None = None
137
+ ):
138
+ media = MediaFileUpload(src_filepath, mimetype=mime_type, resumable=True)
139
+ request = self.connection.files().create(
140
+ body={"name": os.path.basename(src_filepath), "parents": [folder_id]},
141
+ media_body=media,
142
+ supportsAllDrives=True,
143
+ )
144
+ response = None
145
+ while response is None:
146
+ status, response = request.next_chunk()
147
+ if status:
148
+ log.debug(f"Upload progress: {int(status.progress() * 100)}%")
149
+
150
+ log.debug(
151
+ f"File {src_filepath} [{naturalsize(os.path.getsize(src_filepath))}] uploaded to {self.generate_gdrive_folder_url(folder_id)}"
152
+ )
153
+
154
+ def download_gdrive_file(self, file_id: str, dst_filepath: str):
155
+ request = self.connection.files().get_media(
156
+ fileId=file_id, supportsAllDrives=True
157
+ )
158
+
159
+ # Stream directly to disk
160
+ with open(dst_filepath, "wb") as f:
161
+ downloader = MediaIoBaseDownload(f, request)
162
+ done = False
163
+ while not done:
164
+ _, done = downloader.next_chunk()
165
+
166
+ log.debug(
167
+ f"GDrive file {file_id} downloaded to {dst_filepath} with size {naturalsize(os.path.getsize(dst_filepath))}"
168
+ )
169
+
170
+ def delete(self, file_id: str):
171
+ """
172
+ Deletes a file from Google Drive using its ID.
173
+ :param file_id: The ID of the file to delete.
174
+ """
175
+ self.connection.files().delete(fileId=file_id, supportsAllDrives=True).execute()
176
+ log.debug(f"GDrive file with ID {file_id} deleted")
177
+
178
+ # endregion
179
+
180
+ # region Other utilieis
181
+
182
+ @staticmethod
183
+ def generate_gdrive_folder_url(folder_id: str):
184
+ """
185
+ Generate a valid GDrive folder URL
186
+
187
+ Args:
188
+ folder_id (str): Folder ID
189
+
190
+ Returns:
191
+ str: A valid GDrive folder URL
192
+ """
193
+
194
+ return f"https://drive.google.com/drive/folders/{folder_id}"
195
+
196
+ # endregion
utill/my_input.py CHANGED
@@ -1,11 +1,15 @@
1
1
  from .my_style import italic
2
2
 
3
3
 
4
- def ask_yes_no(prompt: str = 'Continue?', yes_strings: tuple[str] = ('y', ), throw_if_no: bool = False) -> str:
5
- prompt = f'{prompt} ({yes_strings[0]}/no) : '
6
- yes = input(f'\n{italic(prompt)}') in yes_strings
4
+ def ask_yes_no(
5
+ prompt: str = "Continue?",
6
+ yes_strings: tuple[str] = ("y",),
7
+ throw_if_no: bool = False,
8
+ ) -> str:
9
+ prompt = f"{prompt} ({yes_strings[0]}/no) : "
10
+ yes = input(f"\n{italic(prompt)}") in yes_strings
7
11
  if not yes:
8
12
  if throw_if_no:
9
- raise Exception('Aborted by user')
13
+ raise Exception("Aborted by user")
10
14
 
11
15
  return yes
utill/my_json.py CHANGED
@@ -6,7 +6,7 @@ def _crawl_dictionary_keys(d: dict, path: tuple = ()) -> list[str]:
6
6
  paths: list[tuple] = []
7
7
 
8
8
  for key in d.keys():
9
- key_path = path + (key, )
9
+ key_path = path + (key,)
10
10
 
11
11
  # Recursively traverse nested dictionary
12
12
  if type(d[key]) is dict:
@@ -35,11 +35,11 @@ def flatten(data: str | dict) -> list:
35
35
 
36
36
  def get_path(data: dict, path: str) -> str:
37
37
  if type(data) != dict:
38
- raise ValueError('data is not a dictionary!')
38
+ raise ValueError("data is not a dictionary!")
39
39
 
40
- items = path.split('.')
40
+ items = path.split(".")
41
41
  item = items[0]
42
- path_remaining = '.'.join(items[1:]) if len(items) > 1 else None
42
+ path_remaining = ".".join(items[1:]) if len(items) > 1 else None
43
43
 
44
44
  if item not in data:
45
45
  return None
@@ -55,8 +55,8 @@ def load_jsonc_file(path) -> dict:
55
55
  Read a .jsonc (JSON with comment) files, as json.loads cannot read it
56
56
  """
57
57
 
58
- with open(path, 'r') as f:
58
+ with open(path, "r") as f:
59
59
  content = f.read()
60
60
  pattern = r'("(?:\\.|[^"\\])*")|\/\/.*|\/\*[\s\S]*?\*\/'
61
- content = re.sub(pattern, lambda m: m.group(1) if m.group(1) else '', content)
61
+ content = re.sub(pattern, lambda m: m.group(1) if m.group(1) else "", content)
62
62
  return json.loads(content)