PyPI - rdxz2-utill - Versions diffs - 0.0.3__py3-none-any.whl → 0.1.5__py3-none-any.whl - Mend

rdxz2-utill 0.0.3py3-none-any.whl → 0.1.5py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (35) hide show

{rdxz2_utill-0.0.3.dist-info → rdxz2_utill-0.1.5.dist-info}/METADATA +16 -15
rdxz2_utill-0.1.5.dist-info/RECORD +38 -0
{rdxz2_utill-0.0.3.dist-info → rdxz2_utill-0.1.5.dist-info}/WHEEL +1 -1
utill/cmd/_bq.py +16 -3
utill/cmd/_conf.py +21 -16
utill/cmd/_enc.py +8 -4
utill/cmd/_mb.py +141 -0
utill/cmd/_pg.py +4 -2
utill/cmd/utill.py +203 -61
utill/my_bq.py +661 -293
utill/my_cli.py +48 -0
utill/my_compare.py +34 -0
utill/my_const.py +9 -9
utill/my_csv.py +41 -20
utill/my_datetime.py +25 -12
utill/my_encryption.py +31 -13
utill/my_env.py +25 -14
utill/my_file.py +16 -14
utill/my_gcs.py +93 -105
utill/my_gdrive.py +196 -0
utill/my_input.py +8 -4
utill/my_json.py +6 -6
utill/my_mb.py +357 -337
utill/my_pg.py +96 -61
utill/my_queue.py +96 -7
utill/my_string.py +23 -5
utill/my_style.py +18 -16
utill/my_tunnel.py +30 -9
utill/my_xlsx.py +12 -9
utill/templates/mb.json +2 -1
utill/templates/pg.json +2 -1
rdxz2_utill-0.0.3.dist-info/RECORD +0 -34
{rdxz2_utill-0.0.3.dist-info → rdxz2_utill-0.1.5.dist-info}/entry_points.txt +0 -0
{rdxz2_utill-0.0.3.dist-info → rdxz2_utill-0.1.5.dist-info}/licenses/LICENSE +0 -0
{rdxz2_utill-0.0.3.dist-info → rdxz2_utill-0.1.5.dist-info}/top_level.txt +0 -0

utill/my_gcs.py CHANGED Viewed

@@ -1,117 +1,105 @@
 import os
-import re
 from google.cloud import storage
 from loguru import logger
+from .my_datetime import get_current_datetime_str
 from .my_env import envs
+from .my_string import generate_random_string
 class GCS:
+    def __init__(self, bucket: str | None = None, project_id: str | None = None):
+        if project_id is None and envs.GCP_PROJECT_ID is None:
+            logger.warning("Using ADC for GCS authentication")
+        if bucket is None and envs.GCS_BUCKET is None:
+            raise ValueError(
+                "Bucket name must be provided either as an argument or set in environment variables."
+            )
+        self.client = storage.Client(project=project_id or envs.GCP_PROJECT_ID)
+        self.bucket = self.client.bucket(bucket or envs.GCS_BUCKET)
+        logger.debug(
+            f"GCS client open, project: {self.client.project}, bucket: {self.bucket.name}"
+        )
+    def get_blob(self, blobpath: str) -> storage.Blob:
+        return self.bucket.blob(blobpath)
+    def list_blobs(self, prefix: str) -> list[storage.Blob]:
+        return self.bucket.list_blobs(prefix=prefix)
+    def delete_blob(self, blobpath: str | storage.Blob) -> storage.Blob:
+        blob = self.get_blob(blobpath) if isinstance(blobpath, str) else blobpath
+        return blob.delete()
+    def copy(
+        self,
+        src_blobpath: str,
+        dst_blobpath: str,
+        dst_bucket: str = None,
+        move: bool = False,
+    ):
+        src_bucket = self.bucket
+        src_blob = self.get_blob(src_blobpath)
+        dst_bucket = dst_bucket or src_bucket.name
+        self.bucket.copy_blob(src_blob, dst_bucket, dst_blobpath)
+        # Move mode
+        if move:
+            self.delete_blob(src_blobpath)
+            logger.debug(
+                f"Moved gs://{src_bucket}/{src_blobpath} to gs://{dst_bucket}/{dst_blobpath}"
+            )
+        # Copy mode
+        else:
+            logger.debug(
+                f"Copied gs://{src_bucket}/{src_blobpath} to gs://{dst_bucket}/{dst_blobpath}"
+            )
+    def upload(self, src_filepath: str, dst_blobpath: str, move: bool = False):
+        blob = self.get_blob(dst_blobpath)
+        blob.upload_from_filename(src_filepath)
+        # Move mode
+        if move:
+            os.remove(src_filepath)
+            logger.debug(f"Moved {src_filepath} to gs://{self.bucket.name}/{blob.name}")
+        # Copy mode
+        else:
+            logger.debug(
+                f"Uploaded {src_filepath} to gs://{self.bucket.name}/{blob.name}"
+            )
+    def download(
+        self, src_blobpath: str | storage.Blob, dst_filepath: str, move: bool = False
+    ):
+        blob = (
+            self.get_blob(src_blobpath)
+            if isinstance(src_blobpath, str)
+            else src_blobpath
+        )
+        blob.download_to_filename(dst_filepath)
+        if move:
+            self.delete_blob(blob)
+            logger.debug(f"Moved gs://{self.bucket.name}/{blob.name} to {dst_filepath}")
+        else:
+            logger.debug(
+                f"Copied gs://{self.bucket.name}/{blob.name} to {dst_filepath}"
+            )
+    # MARK: Utilities
-    def __init__(self, project: str = None, bucket_name: str = None):
-        self.project = project if project is not None else envs.GCP_PROJECT_ID
-        self.client = storage.Client(project=self.project)
-        bucket_name_parts = (bucket_name or envs.GCS_BUCKET).split('/')
-        self.change_bucket(bucket_name_parts[0])
-        self.base_path = '/'.join(bucket_name_parts[1:]) if len(bucket_name_parts) > 1 else None
-        not self.base_path or logger.debug(f'Base path: {self.base_path}')
-        logger.debug(f'GCS client open, project: {project or "<application-default>"}')
-    def __enter__(self):
-        return self
-    def __exit__(self, exc_type, exc_value, exc_tb):
-        self.close_client()
-    def _construct_path(self, path: str) -> str:
-        return f'{self.base_path}/{path}' if self.base_path else path
-    def change_bucket(self, bucket_name: str):
-        self.bucket = self.client.bucket(bucket_name)
-        logger.debug(f'Change bucket to {self.bucket.name}')
-    def get(self, path: str) -> storage.Blob:
-        path = self._construct_path(path)
-        return self.bucket.blob(path)
-    def list(self, path: str) -> list[storage.Blob]:
-        path = self._construct_path(path)
-        if '*' in path:
-            path_prefix = path.split('*')[0]
-            regex_pattern = '^' + re.escape(path).replace('\\*', '.*') + '$'
-            regex = re.compile(regex_pattern)
-            return [x for x in self.bucket.list_blobs(prefix=path_prefix) if regex.match(x.name)]
-        return list(self.bucket.list_blobs(prefix=path))
-    def copy(self, src_path: str, dst_path: str, mv: bool = False):
-        src_blob = self.get(src_path)
-        dst_blob = self.get(dst_path)
-        dst_blob.rewrite(src_blob)
-        logger.debug(f'✅ Copy gs://{src_blob.bucket.name}/{src_blob.name} to gs://{dst_blob.bucket.name}/{dst_blob.name}')
-        not mv or GCS.remove_blob(src_blob)
-        return dst_blob
-    def copy_to_other_gcs(self, src_blob: storage.Blob, dst_gcs: "GCS", dst_path: str, mv: bool = False):
-        self.bucket.copy_blob(src_blob, dst_gcs.bucket, dst_path)
-        dst_blob = dst_gcs.get(dst_path)
-        not mv or GCS.remove_blob(src_blob)
-        return dst_blob
-    def upload(self, local_path: str, remote_path: str, mv: bool = False):
-        local_path = os.path.expanduser(local_path)
-        if not os.path.exists(local_path):
-            raise FileNotFoundError(f'File not found: {local_path}')
-        blob = self.get(remote_path)
-        blob.upload_from_filename(local_path)
-        logger.debug(f'✅ Upload {local_path} to gs://{self.bucket.name}/{blob.name}')
-        not mv or os.remove(local_path)
-        return blob
-    def download(self, obj: str | storage.Blob, local_path: str, mv: bool = False):
-        local_path = os.path.expanduser(local_path)
-        is_blob = type(obj) == storage.Blob
-        if os.path.isdir(local_path):
-            local_path = os.path.join(local_path, obj.name.split('/')[-1] if is_blob else os.path.basename(obj))
-            if not os.path.dirname(local_path):
-                raise FileNotFoundError(f'Destination directory not found: {os.path.dirname(local_path)}')
-        blob = obj if is_blob else self.get(obj)
-        blob.download_to_filename(local_path)
-        logger.debug(f'✅ Download gs://{self.bucket.name}/{blob.name} to {local_path}')
-        not mv or GCS.remove_blob(blob)
-        return blob
-    def remove(self, remote_path: str):
-        blob = self.get(remote_path)
-        GCS.remove_blob(blob)
-        return blob
+    @staticmethod
+    def build_tmp_dirpath(prefix: str = "tmp") -> str:
+        """
+        Builds a temporary directory path in the GCS bucket.
+        """
+        return f"{prefix}/{get_current_datetime_str()}_{generate_random_string(alphanum=True)}"
-    def close_client(self):
+    def close(self):
         self.client.close()
-        logger.debug('GCS client close')
-    @staticmethod
-    def remove_blob(blob: storage.Blob):
-        blob.delete()
-        logger.debug(f'🗑️ Remove gs://{blob.bucket.name}/{blob.name}')
+        logger.debug("GCS client closed")

utill/my_gdrive.py ADDED Viewed

@@ -0,0 +1,196 @@
+import enum
+import logging
+import os
+from google.auth import default
+from googleapiclient.discovery import build
+from googleapiclient.http import MediaFileUpload
+from googleapiclient.http import MediaIoBaseDownload
+from humanize import naturalsize
+log = logging.getLogger(__name__)
+class Role(enum.StrEnum):
+    READER = "reader"
+    WRITER = "writer"
+    COMMENTER = "commenter"
+    OWNER = "owner"
+class GDrive:
+    """
+    Custom hook for Google Drive integration in Airflow.
+    This hook can be used to interact with Google Drive APIs.
+    """
+    def __init__(self):
+        credentials, project = default(
+            scopes=[
+                "https://www.googleapis.com/auth/drive",
+                "https://www.googleapis.com/auth/drive.file",
+            ]
+        )
+        drive_service = build("drive", "v3", credentials=credentials)
+        self.connection = drive_service
+    # region Folder operations
+    def get_folder_by_name(self, *, parent_folder_id: str, name: str) -> str | None:
+        """
+        Retrieves a folder by its name within a specified Google Drive folder.
+        :param folder_id: The ID of the parent folder to search in.
+        :param name: The name of the folder to find.
+        :return: The ID of the found folder or None if not found.
+        """
+        query = f"'{parent_folder_id}' in parents and name='{name}' and mimeType='application/vnd.google-apps.folder' and trashed=false"
+        results = (
+            self.connection.files()
+            .list(q=query, fields="files(id)", supportsAllDrives=True)
+            .execute()
+        )
+        items = results.get("files", [])
+        return items[0]["id"] if items else None
+    def create_folder(
+        self, folder_name: str, parent_folder_id: str | None = None
+    ) -> str:
+        """
+        Creates a folder in Google Drive.
+        :param folder_name: The name of the folder to create.
+        :param parent_folder_id: The ID of the parent folder (optional).
+        :return: The ID of the created folder.
+        """
+        file_metadata = {
+            "name": folder_name,
+            "mimeType": "application/vnd.google-apps.folder",
+        }
+        if parent_folder_id:
+            file_metadata["parents"] = [parent_folder_id]
+        file = (
+            self.connection.files()
+            .create(body=file_metadata, fields="id", supportsAllDrives=True)
+            .execute()
+        )
+        log.debug(
+            f"Folder {folder_name} created under {self.generate_gdrive_folder_url(parent_folder_id)}"
+        )
+        return file.get("id")
+    def grant_folder_access(
+        self,
+        folder_id: str,
+        email: str,
+        role: Role = Role.READER,
+        send_notification_email: bool = False,
+    ):
+        """
+        Grants access to a Google Drive folder to a user by email.
+        :param folder_id: The ID of the folder to grant access to.
+        :param email: The email address of the user to grant access to.
+        :param role: The role to assign (reader, writer, commenter, owner).
+        """
+        self.connection.permissions().create(
+            fileId=folder_id,
+            body={
+                "type": "user",
+                "role": role,
+                "emailAddress": email,
+            },
+            sendNotificationEmail=send_notification_email,
+            supportsAllDrives=True,
+        ).execute()
+        log.debug(
+            f"Granted {role} access to {email} for folder {self.generate_gdrive_folder_url(folder_id)}"
+        )
+    # endregion
+    # region File operations
+    def get_file(self, file_id: str):
+        raise NotImplementedError()
+    def list_files(self, folder_id: str, mime_type: str | None = None):
+        """
+        Lists files in a specified Google Drive folder.
+        :param folder_id: The ID of the folder to search in.
+        :param mime_type: Optional MIME type to filter files by.
+        :return: A list of files in the specified folder.
+        """
+        query = f"'{folder_id}' in parents and trashed=false"
+        if mime_type:
+            query += f" and mimeType='{mime_type}'"
+        results = (
+            self.connection.files()
+            .list(q=query, fields="files(id, name)", supportsAllDrives=True)
+            .execute()
+        )
+        return results.get("files", [])
+    def upload_file(
+        self, src_filepath: str, folder_id: str, mime_type: str | None = None
+    ):
+        media = MediaFileUpload(src_filepath, mimetype=mime_type, resumable=True)
+        request = self.connection.files().create(
+            body={"name": os.path.basename(src_filepath), "parents": [folder_id]},
+            media_body=media,
+            supportsAllDrives=True,
+        )
+        response = None
+        while response is None:
+            status, response = request.next_chunk()
+            if status:
+                log.debug(f"Upload progress: {int(status.progress() * 100)}%")
+        log.debug(
+            f"File {src_filepath} [{naturalsize(os.path.getsize(src_filepath))}] uploaded to {self.generate_gdrive_folder_url(folder_id)}"
+        )
+    def download_gdrive_file(self, file_id: str, dst_filepath: str):
+        request = self.connection.files().get_media(
+            fileId=file_id, supportsAllDrives=True
+        )
+        # Stream directly to disk
+        with open(dst_filepath, "wb") as f:
+            downloader = MediaIoBaseDownload(f, request)
+            done = False
+            while not done:
+                _, done = downloader.next_chunk()
+        log.debug(
+            f"GDrive file {file_id} downloaded to {dst_filepath} with size {naturalsize(os.path.getsize(dst_filepath))}"
+        )
+    def delete(self, file_id: str):
+        """
+        Deletes a file from Google Drive using its ID.
+        :param file_id: The ID of the file to delete.
+        """
+        self.connection.files().delete(fileId=file_id, supportsAllDrives=True).execute()
+        log.debug(f"GDrive file with ID {file_id} deleted")
+    # endregion
+    # region Other utilieis
+    @staticmethod
+    def generate_gdrive_folder_url(folder_id: str):
+        """
+        Generate a valid GDrive folder URL
+        Args:
+            folder_id (str): Folder ID
+        Returns:
+            str: A valid GDrive folder URL
+        """
+        return f"https://drive.google.com/drive/folders/{folder_id}"
+    # endregion

utill/my_input.py CHANGED Viewed

@@ -1,11 +1,15 @@
 from .my_style import italic
-def ask_yes_no(prompt: str = 'Continue?', yes_strings: tuple[str] = ('y', ), throw_if_no: bool = False) -> str:
-    prompt = f'{prompt} ({yes_strings[0]}/no) : '
-    yes = input(f'\n{italic(prompt)}') in yes_strings
+def ask_yes_no(
+    prompt: str = "Continue?",
+    yes_strings: tuple[str] = ("y",),
+    throw_if_no: bool = False,
+) -> str:
+    prompt = f"{prompt} ({yes_strings[0]}/no) : "
+    yes = input(f"\n{italic(prompt)}") in yes_strings
     if not yes:
         if throw_if_no:
-            raise Exception('Aborted by user')
+            raise Exception("Aborted by user")
     return yes

utill/my_json.py CHANGED Viewed

@@ -6,7 +6,7 @@ def _crawl_dictionary_keys(d: dict, path: tuple = ()) -> list[str]:
     paths: list[tuple] = []
     for key in d.keys():
-        key_path = path + (key, )
+        key_path = path + (key,)
         # Recursively traverse nested dictionary
         if type(d[key]) is dict:
@@ -35,11 +35,11 @@ def flatten(data: str | dict) -> list:
 def get_path(data: dict, path: str) -> str:
     if type(data) != dict:
-        raise ValueError('data is not a dictionary!')
+        raise ValueError("data is not a dictionary!")
-    items = path.split('.')
+    items = path.split(".")
     item = items[0]
-    path_remaining = '.'.join(items[1:]) if len(items) > 1 else None
+    path_remaining = ".".join(items[1:]) if len(items) > 1 else None
     if item not in data:
         return None
@@ -55,8 +55,8 @@ def load_jsonc_file(path) -> dict:
     Read a .jsonc (JSON with comment) files, as json.loads cannot read it
     """
-    with open(path, 'r') as f:
+    with open(path, "r") as f:
         content = f.read()
         pattern = r'("(?:\\.|[^"\\])*")|\/\/.*|\/\*[\s\S]*?\*\/'
-        content = re.sub(pattern, lambda m: m.group(1) if m.group(1) else '', content)
+        content = re.sub(pattern, lambda m: m.group(1) if m.group(1) else "", content)
         return json.loads(content)

rdxz2-utill 0.0.3__py3-none-any.whl → 0.1.5__py3-none-any.whl

rdxz2-utill 0.0.3py3-none-any.whl → 0.1.5py3-none-any.whl