PyPI - gdarch - Versions diffs - 0.1.0__py3-none-any.whl - Mend

gdarch 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (7) hide show

gdarch/__init__.py +1 -0
gdarch/cli.py +282 -0
gdarch-0.1.0.dist-info/LICENSE +21 -0
gdarch-0.1.0.dist-info/METADATA +140 -0
gdarch-0.1.0.dist-info/RECORD +7 -0
gdarch-0.1.0.dist-info/WHEEL +4 -0
gdarch-0.1.0.dist-info/entry_points.txt +3 -0

gdarch/__init__.py ADDED Viewed

	@@ -0,0 +1 @@
1	+ __version__ = "0.1.0"

gdarch/cli.py ADDED Viewed

@@ -0,0 +1,282 @@
+#!/usr/bin/env python3
+"""
+CLI Tool to Archive a Google Drive Folder and Replace It with the Archive
+This script recursively downloads all files under a specified Google Drive folder,
+creates a high-compression tar.xz archive, uploads the archive to the parent folder,
+and optionally deletes the original folder.
+Usage Examples:
+  # Archive and upload without deleting the original folder:
+  garch --folder-id <TARGET_FOLDER_ID> --credentials credentials.json
+  # Archive, upload, and delete the original folder:
+  garch --folder-id <TARGET_FOLDER_ID> --credentials credentials.json --delete-folder
+  # Specify a custom archive filename:
+  garch --folder-id <TARGET_FOLDER_ID> --archive-name my_archive.tar.xz --credentials credentials.json
+"""
+import argparse
+import io
+import lzma
+import os
+import posixpath
+import shutil
+import sys
+import tarfile
+import tempfile
+import requests
+from google.auth.transport.requests import Request
+from google.oauth2.credentials import Credentials
+from google_auth_oauthlib.flow import InstalledAppFlow
+from googleapiclient.discovery import build
+from googleapiclient.http import MediaFileUpload
+# Google Drive scope with read and write permissions
+SCOPES = ["https://www.googleapis.com/auth/drive"]
+def get_credentials(creds_file, token_file="token.json"):
+    """
+    Retrieve OAuth2 credentials. Uses token_file to store credentials for future runs.
+    """
+    creds = None
+    if os.path.exists(token_file):
+        creds = Credentials.from_authorized_user_file(token_file, SCOPES)
+    if not creds or not creds.valid:
+        if creds and creds.expired and creds.refresh_token:
+            try:
+                creds.refresh(Request())
+            except Exception as e:
+                print("Failed to refresh credentials:", e)
+                creds = None
+        if not creds:
+            flow = InstalledAppFlow.from_client_secrets_file(creds_file, SCOPES)
+            creds = flow.run_local_server(port=0)
+        with open(token_file, "w") as token:
+            token.write(creds.to_json())
+    return creds
+def get_drive_service(creds):
+    """Create a Google Drive API service instance."""
+    return build("drive", "v3", credentials=creds)
+def list_files(service, folder_id, parent_path=""):
+    """
+    Recursively list all files under the specified folder_id.
+    Returns a list of dictionaries with keys: id, name, mimeType, size, relative_path.
+    Files without size information (e.g. Google Docs) are skipped.
+    """
+    results = []
+    page_token = None
+    query = "'{}' in parents".format(folder_id)
+    while True:
+        response = (
+            service.files()
+            .list(
+                q=query,
+                fields="nextPageToken, files(id, name, mimeType, size)",
+                pageToken=page_token,
+                pageSize=1000,
+            )
+            .execute()
+        )
+        for f in response.get("files", []):
+            file_path = posixpath.join(parent_path, f["name"])
+            if f["mimeType"] == "application/vnd.google-apps.folder":
+                # Recursively process subfolders
+                results.extend(list_files(service, f["id"], file_path))
+            else:
+                if "size" in f:
+                    f["relative_path"] = file_path
+                    results.append(f)
+                else:
+                    print(
+                        "Skipping file (no size info):",
+                        file_path,
+                        "mimeType:",
+                        f["mimeType"],
+                    )
+        page_token = response.get("nextPageToken", None)
+        if not page_token:
+            break
+    return results
+class LimitedStream:
+    """
+    A wrapper for a stream that limits the number of bytes read.
+    This ensures that tarfile.addfile() reads the correct amount of data.
+    """
+    def __init__(self, stream, limit):
+        self.stream = stream
+        self.remaining = limit
+    def read(self, size=-1):
+        if self.remaining <= 0:
+            return b""
+        if size < 0 or size > self.remaining:
+            size = self.remaining
+        data = self.stream.read(size)
+        self.remaining -= len(data)
+        return data
+    def readable(self):
+        return True
+def create_archive(service, creds, folder_id, archive_path):
+    """
+    Download files under the specified folder_id and create a highly compressed LZMA archive at archive_path.
+    Uses maximum compression settings for best compression ratio.
+    """
+    print("Retrieving file list from the specified folder...")
+    files = list_files(service, folder_id)
+    print("Total files to archive:", len(files))
+    if not files:
+        print("No files found in the specified folder.")
+        return False
+    try:
+        # Open tar.xz with maximum compression settings
+        tar = tarfile.open(
+            archive_path,
+            mode="w:xz",
+            preset=9 | lzma.PRESET_EXTREME,
+        )
+    except Exception as e:
+        print("Failed to create archive file:", e)
+        return False
+    total_size = sum(int(f["size"]) for f in files)
+    processed_size = 0
+    for f in files:
+        rel_path = f["relative_path"]
+        file_id = f["id"]
+        try:
+            file_size = int(f["size"])
+        except Exception as e:
+            print("Invalid size info for file, skipping:", rel_path)
+            continue
+        print(
+            f"Adding to archive: {rel_path} ({file_size} bytes) - {processed_size * 100 / total_size:.1f}% complete"
+        )
+        url = "https://www.googleapis.com/drive/v3/files/{}?alt=media".format(file_id)
+        headers = {"Authorization": "Bearer " + creds.token}
+        try:
+            # Download file in streaming mode
+            response = requests.get(url, headers=headers, stream=True)
+            if response.status_code != 200:
+                print("  [ERROR] Failed to download file. HTTP status code:", response.status_code)
+                continue
+            response.raw.decode_content = True
+            limited_stream = LimitedStream(response.raw, file_size)
+            tarinfo = tarfile.TarInfo(name=rel_path)
+            tarinfo.size = file_size
+            tar.addfile(tarinfo, fileobj=limited_stream)
+            processed_size += file_size
+        except Exception as e:
+            print("  [ERROR] Error while adding file to archive:", e)
+            continue
+    tar.close()
+    return True
+def upload_file(service, local_file, name, parent_id):
+    """
+    Upload the local file to Google Drive under the specified parent folder.
+    """
+    file_metadata = {"name": name, "parents": [parent_id]}
+    media = MediaFileUpload(local_file, mimetype="application/x-xz", resumable=True)
+    file = service.files().create(body=file_metadata, media_body=media, fields="id").execute()
+    return file.get("id")
+def delete_file_or_folder(service, file_id):
+    """Delete the specified file or folder from Google Drive."""
+    try:
+        service.files().delete(fileId=file_id).execute()
+        print("Successfully deleted. ID:", file_id)
+    except Exception as e:
+        print("Error deleting file/folder:", e)
+def get_file_metadata(service, file_id):
+    """Retrieve metadata (id, name, parents) for the specified file."""
+    return service.files().get(fileId=file_id, fields="id,name,parents").execute()
+def main():
+    parser = argparse.ArgumentParser(
+        description="Archive a specified Google Drive folder and replace it with the archive."
+    )
+    parser.add_argument("--folder-id", required=True, help="Google Drive ID of the target folder")
+    parser.add_argument(
+        "--credentials",
+        default="credentials.json",
+        help="OAuth2 credentials file (e.g., credentials.json)",
+    )
+    parser.add_argument(
+        "--archive-name",
+        help="Name for the uploaded archive file (e.g., folder_archive.tar.xz). "
+        "Defaults to folder name + '.tar.xz'",
+    )
+    parser.add_argument(
+        "--delete-folder",
+        action="store_true",
+        help="Delete the original folder after archiving",
+    )
+    args = parser.parse_args()
+    # Initialize credentials and Drive API service
+    creds = get_credentials(args.credentials)
+    service = get_drive_service(creds)
+    # Retrieve metadata for the target folder (name, parent folder, etc.)
+    folder_meta = get_file_metadata(service, args.folder_id)
+    folder_name = folder_meta.get("name", "folder")
+    parent_ids = folder_meta.get("parents", [])
+    if not parent_ids:
+        print("No parent folder found. Cannot process root-level folders.")
+        sys.exit(1)
+    parent_id = parent_ids[0]
+    archive_name = args.archive_name if args.archive_name else f"{folder_name}.tar.xz"
+    print("Archive file name:", archive_name)
+    # Create archive in a temporary directory
+    temp_dir = tempfile.mkdtemp()
+    archive_path = os.path.join(temp_dir, archive_name)
+    print("Creating archive at temporary location:", archive_path)
+    if not create_archive(service, creds, args.folder_id, archive_path):
+        print("Failed to create archive.")
+        shutil.rmtree(temp_dir)
+        sys.exit(1)
+    print("Archive created successfully. Starting upload...")
+    archive_file_id = upload_file(service, archive_path, archive_name, parent_id)
+    print("Upload complete. Archive file ID:", archive_file_id)
+    if args.delete_folder:
+        print("Deleting original folder as specified...")
+        delete_file_or_folder(service, args.folder_id)
+    else:
+        print("Original folder retained (option --delete-folder not specified).")
+    # Clean up temporary directory
+    shutil.rmtree(temp_dir)
+    print("Operation completed successfully. Enjoy your productive day!")
+if __name__ == "__main__":
+    main()

gdarch-0.1.0.dist-info/LICENSE ADDED Viewed

@@ -0,0 +1,21 @@
+MIT License
+Copyright (c) 2024 Taro Furuya
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.

gdarch-0.1.0.dist-info/METADATA ADDED Viewed

@@ -0,0 +1,140 @@
+Metadata-Version: 2.1
+Name: gdarch
+Version: 0.1.0
+Summary: CLI tool to archive a Google Drive folder and replace it with the archive. Helps to free up Google Drive storage space by compressing rarely accessed folders while keeping them accessible.
+Home-page: https://github.com/taross-f/gdarch
+License: MIT
+Keywords: google-drive,archive,compression,backup,storage-management
+Author: Taro Furuya
+Author-email: taro.furuya@gmail.com
+Requires-Python: >=3.8,<4.0
+Classifier: Development Status :: 4 - Beta
+Classifier: Environment :: Console
+Classifier: Intended Audience :: End Users/Desktop
+Classifier: License :: OSI Approved :: MIT License
+Classifier: Operating System :: OS Independent
+Classifier: Programming Language :: Python :: 3
+Classifier: Programming Language :: Python :: 3.8
+Classifier: Programming Language :: Python :: 3.9
+Classifier: Programming Language :: Python :: 3.10
+Classifier: Programming Language :: Python :: 3.11
+Classifier: Programming Language :: Python :: 3.12
+Classifier: Topic :: System :: Archiving :: Compression
+Classifier: Topic :: Utilities
+Requires-Dist: google-api-python-client (>=2.0.0,<3.0.0)
+Requires-Dist: google-auth (>=2.0.0,<3.0.0)
+Requires-Dist: google-auth-oauthlib (>=1.0.0,<2.0.0)
+Requires-Dist: requests (>=2.25.0,<3.0.0)
+Project-URL: Repository, https://github.com/taross-f/gdarch
+Description-Content-Type: text/markdown
+# gdarch
+[![CI](https://github.com/taross-f/gdarch/actions/workflows/ci.yml/badge.svg)](https://github.com/taross-f/gdarch/actions/workflows/ci.yml)
+[![codecov](https://codecov.io/gh/taross-f/gdarch/branch/main/graph/badge.svg)](https://codecov.io/gh/taross-f/gdarch)
+A CLI tool to archive Google Drive folders and replace them with compressed archives.
+## Motivation
+Google Drive storage space is often filled with large folders that are rarely accessed but need to be kept for reference or backup purposes. This tool helps you free up storage space by:
+1. Automatically compressing such folders into high-compression archives
+2. Replacing the original folders with their compressed versions
+3. Maintaining the same folder structure and accessibility
+This way, you can keep your important data while significantly reducing storage usage.
+## Features
+- Recursively downloads all files from a specified Google Drive folder
+- Creates a high-compression tar.xz archive
+- Uploads the archive to the parent folder
+- Optionally deletes the original folder
+## Installation
+### From PyPI
+```bash
+pip install gdarch
+```
+### From Source
+```bash
+# Install Poetry (if not already installed)
+curl -sSL https://install.python-poetry.org | python3 -
+# Clone and install
+git clone https://github.com/taross-f/gdarch.git
+cd gdarch
+poetry install
+```
+## Usage
+1. Get OAuth2 credentials from Google Cloud Console:
+   - Visit [Google Cloud Console](https://console.cloud.google.com/)
+   - Create or select a project
+   - Go to APIs & Services > Credentials
+   - Create an OAuth 2.0 Client ID
+   - Download the credentials and save as `credentials.json`
+2. Run the command:
+```bash
+# When installed from PyPI
+gdarch --folder-id <TARGET_FOLDER_ID> --credentials credentials.json
+# When installed from source (using Poetry)
+poetry run gdarch --folder-id <TARGET_FOLDER_ID> --credentials credentials.json
+# Archive and delete the original folder
+gdarch --folder-id <TARGET_FOLDER_ID> --credentials credentials.json --delete-folder
+# Specify a custom archive name
+gdarch --folder-id <TARGET_FOLDER_ID> --archive-name my_archive.tar.xz --credentials credentials.json
+```
+### Options
+- `--folder-id`: Google Drive folder ID to archive (required)
+- `--credentials`: Path to OAuth2 credentials file (defaults to credentials.json)
+- `--archive-name`: Name for the uploaded archive file (optional)
+- `--delete-folder`: Delete the original folder after archiving (flag)
+### Finding Folder ID
+The folder ID is the last part of the Google Drive folder URL:
+```
+https://drive.google.com/drive/folders/1234567890abcdef
+                                      ^^^^^^^^^^^^^^^^
+                                      This is your folder ID
+```
+## Development
+```bash
+# Install dependencies
+poetry install
+# Run tests
+poetry run pytest
+# Format code
+poetry run black .
+poetry run isort .
+```
+## How It Works
+1. Authenticates with Google Drive using OAuth2
+2. Recursively lists all files in the specified folder
+3. Downloads files while streaming them directly into a tar.xz archive
+4. Uploads the compressed archive to the parent folder
+5. Optionally deletes the original folder
+6. Cleans up temporary files
+## License
+MIT License

gdarch-0.1.0.dist-info/RECORD ADDED Viewed

@@ -0,0 +1,7 @@
+gdarch/__init__.py,sha256=kUR5RAFc7HCeiqdlX36dZOHkUI5wI6V_43RpEcD8b-0,22
+gdarch/cli.py,sha256=NgSNG3aUpt0PInn5VfqPCJJkh98S7VpvJuVkTGS0IKM,9777
+gdarch-0.1.0.dist-info/LICENSE,sha256=CBYYPBEwlLLNljBvM5zvVymajD7Cd-DLpPyN_f0d8Lk,1068
+gdarch-0.1.0.dist-info/METADATA,sha256=PmvCiiXyxnkoR8JKwE0hIi6A-zgz_gvDyYGOXu7nQBI,4593
+gdarch-0.1.0.dist-info/WHEEL,sha256=FMvqSimYX_P7y0a7UY-_Mc83r5zkBZsCYPm7Lr0Bsq4,88
+gdarch-0.1.0.dist-info/entry_points.txt,sha256=duJYwjiEX8HSYLwjDLpJ4D1QF2y8XtE7JX6Bvy9Qu4Y,42
+gdarch-0.1.0.dist-info/RECORD,,

gdarch-0.1.0.dist-info/WHEEL ADDED Viewed

@@ -0,0 +1,4 @@
+Wheel-Version: 1.0
+Generator: poetry-core 1.8.1
+Root-Is-Purelib: true
+Tag: py3-none-any

gdarch-0.1.0.dist-info/entry_points.txt ADDED Viewed

@@ -0,0 +1,3 @@
+[console_scripts]
+gdarch=gdarch.cli:main