gdarch 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
gdarch/__init__.py ADDED
@@ -0,0 +1 @@
1
+ __version__ = "0.1.0"
gdarch/cli.py ADDED
@@ -0,0 +1,282 @@
1
+ #!/usr/bin/env python3
2
+ """
3
+ CLI Tool to Archive a Google Drive Folder and Replace It with the Archive
4
+
5
+ This script recursively downloads all files under a specified Google Drive folder,
6
+ creates a high-compression tar.xz archive, uploads the archive to the parent folder,
7
+ and optionally deletes the original folder.
8
+
9
+ Usage Examples:
10
+ # Archive and upload without deleting the original folder:
11
+ garch --folder-id <TARGET_FOLDER_ID> --credentials credentials.json
12
+
13
+ # Archive, upload, and delete the original folder:
14
+ garch --folder-id <TARGET_FOLDER_ID> --credentials credentials.json --delete-folder
15
+
16
+ # Specify a custom archive filename:
17
+ garch --folder-id <TARGET_FOLDER_ID> --archive-name my_archive.tar.xz --credentials credentials.json
18
+ """
19
+
20
+ import argparse
21
+ import io
22
+ import lzma
23
+ import os
24
+ import posixpath
25
+ import shutil
26
+ import sys
27
+ import tarfile
28
+ import tempfile
29
+
30
+ import requests
31
+ from google.auth.transport.requests import Request
32
+ from google.oauth2.credentials import Credentials
33
+ from google_auth_oauthlib.flow import InstalledAppFlow
34
+ from googleapiclient.discovery import build
35
+ from googleapiclient.http import MediaFileUpload
36
+
37
+ # Google Drive scope with read and write permissions
38
+ SCOPES = ["https://www.googleapis.com/auth/drive"]
39
+
40
+
41
+ def get_credentials(creds_file, token_file="token.json"):
42
+ """
43
+ Retrieve OAuth2 credentials. Uses token_file to store credentials for future runs.
44
+ """
45
+ creds = None
46
+ if os.path.exists(token_file):
47
+ creds = Credentials.from_authorized_user_file(token_file, SCOPES)
48
+ if not creds or not creds.valid:
49
+ if creds and creds.expired and creds.refresh_token:
50
+ try:
51
+ creds.refresh(Request())
52
+ except Exception as e:
53
+ print("Failed to refresh credentials:", e)
54
+ creds = None
55
+ if not creds:
56
+ flow = InstalledAppFlow.from_client_secrets_file(creds_file, SCOPES)
57
+ creds = flow.run_local_server(port=0)
58
+ with open(token_file, "w") as token:
59
+ token.write(creds.to_json())
60
+ return creds
61
+
62
+
63
+ def get_drive_service(creds):
64
+ """Create a Google Drive API service instance."""
65
+ return build("drive", "v3", credentials=creds)
66
+
67
+
68
+ def list_files(service, folder_id, parent_path=""):
69
+ """
70
+ Recursively list all files under the specified folder_id.
71
+ Returns a list of dictionaries with keys: id, name, mimeType, size, relative_path.
72
+ Files without size information (e.g. Google Docs) are skipped.
73
+ """
74
+ results = []
75
+ page_token = None
76
+ query = "'{}' in parents".format(folder_id)
77
+ while True:
78
+ response = (
79
+ service.files()
80
+ .list(
81
+ q=query,
82
+ fields="nextPageToken, files(id, name, mimeType, size)",
83
+ pageToken=page_token,
84
+ pageSize=1000,
85
+ )
86
+ .execute()
87
+ )
88
+
89
+ for f in response.get("files", []):
90
+ file_path = posixpath.join(parent_path, f["name"])
91
+ if f["mimeType"] == "application/vnd.google-apps.folder":
92
+ # Recursively process subfolders
93
+ results.extend(list_files(service, f["id"], file_path))
94
+ else:
95
+ if "size" in f:
96
+ f["relative_path"] = file_path
97
+ results.append(f)
98
+ else:
99
+ print(
100
+ "Skipping file (no size info):",
101
+ file_path,
102
+ "mimeType:",
103
+ f["mimeType"],
104
+ )
105
+ page_token = response.get("nextPageToken", None)
106
+ if not page_token:
107
+ break
108
+ return results
109
+
110
+
111
+ class LimitedStream:
112
+ """
113
+ A wrapper for a stream that limits the number of bytes read.
114
+ This ensures that tarfile.addfile() reads the correct amount of data.
115
+ """
116
+
117
+ def __init__(self, stream, limit):
118
+ self.stream = stream
119
+ self.remaining = limit
120
+
121
+ def read(self, size=-1):
122
+ if self.remaining <= 0:
123
+ return b""
124
+ if size < 0 or size > self.remaining:
125
+ size = self.remaining
126
+ data = self.stream.read(size)
127
+ self.remaining -= len(data)
128
+ return data
129
+
130
+ def readable(self):
131
+ return True
132
+
133
+
134
+ def create_archive(service, creds, folder_id, archive_path):
135
+ """
136
+ Download files under the specified folder_id and create a highly compressed LZMA archive at archive_path.
137
+ Uses maximum compression settings for best compression ratio.
138
+ """
139
+ print("Retrieving file list from the specified folder...")
140
+ files = list_files(service, folder_id)
141
+ print("Total files to archive:", len(files))
142
+ if not files:
143
+ print("No files found in the specified folder.")
144
+ return False
145
+
146
+ try:
147
+ # Open tar.xz with maximum compression settings
148
+ tar = tarfile.open(
149
+ archive_path,
150
+ mode="w:xz",
151
+ preset=9 | lzma.PRESET_EXTREME,
152
+ )
153
+ except Exception as e:
154
+ print("Failed to create archive file:", e)
155
+ return False
156
+
157
+ total_size = sum(int(f["size"]) for f in files)
158
+ processed_size = 0
159
+
160
+ for f in files:
161
+ rel_path = f["relative_path"]
162
+ file_id = f["id"]
163
+ try:
164
+ file_size = int(f["size"])
165
+ except Exception as e:
166
+ print("Invalid size info for file, skipping:", rel_path)
167
+ continue
168
+
169
+ print(
170
+ f"Adding to archive: {rel_path} ({file_size} bytes) - {processed_size * 100 / total_size:.1f}% complete"
171
+ )
172
+ url = "https://www.googleapis.com/drive/v3/files/{}?alt=media".format(file_id)
173
+ headers = {"Authorization": "Bearer " + creds.token}
174
+ try:
175
+ # Download file in streaming mode
176
+ response = requests.get(url, headers=headers, stream=True)
177
+ if response.status_code != 200:
178
+ print(" [ERROR] Failed to download file. HTTP status code:", response.status_code)
179
+ continue
180
+ response.raw.decode_content = True
181
+ limited_stream = LimitedStream(response.raw, file_size)
182
+ tarinfo = tarfile.TarInfo(name=rel_path)
183
+ tarinfo.size = file_size
184
+ tar.addfile(tarinfo, fileobj=limited_stream)
185
+ processed_size += file_size
186
+ except Exception as e:
187
+ print(" [ERROR] Error while adding file to archive:", e)
188
+ continue
189
+
190
+ tar.close()
191
+ return True
192
+
193
+
194
+ def upload_file(service, local_file, name, parent_id):
195
+ """
196
+ Upload the local file to Google Drive under the specified parent folder.
197
+ """
198
+ file_metadata = {"name": name, "parents": [parent_id]}
199
+ media = MediaFileUpload(local_file, mimetype="application/x-xz", resumable=True)
200
+ file = service.files().create(body=file_metadata, media_body=media, fields="id").execute()
201
+ return file.get("id")
202
+
203
+
204
+ def delete_file_or_folder(service, file_id):
205
+ """Delete the specified file or folder from Google Drive."""
206
+ try:
207
+ service.files().delete(fileId=file_id).execute()
208
+ print("Successfully deleted. ID:", file_id)
209
+ except Exception as e:
210
+ print("Error deleting file/folder:", e)
211
+
212
+
213
+ def get_file_metadata(service, file_id):
214
+ """Retrieve metadata (id, name, parents) for the specified file."""
215
+ return service.files().get(fileId=file_id, fields="id,name,parents").execute()
216
+
217
+
218
+ def main():
219
+ parser = argparse.ArgumentParser(
220
+ description="Archive a specified Google Drive folder and replace it with the archive."
221
+ )
222
+ parser.add_argument("--folder-id", required=True, help="Google Drive ID of the target folder")
223
+ parser.add_argument(
224
+ "--credentials",
225
+ default="credentials.json",
226
+ help="OAuth2 credentials file (e.g., credentials.json)",
227
+ )
228
+ parser.add_argument(
229
+ "--archive-name",
230
+ help="Name for the uploaded archive file (e.g., folder_archive.tar.xz). "
231
+ "Defaults to folder name + '.tar.xz'",
232
+ )
233
+ parser.add_argument(
234
+ "--delete-folder",
235
+ action="store_true",
236
+ help="Delete the original folder after archiving",
237
+ )
238
+ args = parser.parse_args()
239
+
240
+ # Initialize credentials and Drive API service
241
+ creds = get_credentials(args.credentials)
242
+ service = get_drive_service(creds)
243
+
244
+ # Retrieve metadata for the target folder (name, parent folder, etc.)
245
+ folder_meta = get_file_metadata(service, args.folder_id)
246
+ folder_name = folder_meta.get("name", "folder")
247
+ parent_ids = folder_meta.get("parents", [])
248
+ if not parent_ids:
249
+ print("No parent folder found. Cannot process root-level folders.")
250
+ sys.exit(1)
251
+ parent_id = parent_ids[0]
252
+
253
+ archive_name = args.archive_name if args.archive_name else f"{folder_name}.tar.xz"
254
+ print("Archive file name:", archive_name)
255
+
256
+ # Create archive in a temporary directory
257
+ temp_dir = tempfile.mkdtemp()
258
+ archive_path = os.path.join(temp_dir, archive_name)
259
+ print("Creating archive at temporary location:", archive_path)
260
+
261
+ if not create_archive(service, creds, args.folder_id, archive_path):
262
+ print("Failed to create archive.")
263
+ shutil.rmtree(temp_dir)
264
+ sys.exit(1)
265
+
266
+ print("Archive created successfully. Starting upload...")
267
+ archive_file_id = upload_file(service, archive_path, archive_name, parent_id)
268
+ print("Upload complete. Archive file ID:", archive_file_id)
269
+
270
+ if args.delete_folder:
271
+ print("Deleting original folder as specified...")
272
+ delete_file_or_folder(service, args.folder_id)
273
+ else:
274
+ print("Original folder retained (option --delete-folder not specified).")
275
+
276
+ # Clean up temporary directory
277
+ shutil.rmtree(temp_dir)
278
+ print("Operation completed successfully. Enjoy your productive day!")
279
+
280
+
281
+ if __name__ == "__main__":
282
+ main()
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2024 Taro Furuya
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
@@ -0,0 +1,140 @@
1
+ Metadata-Version: 2.1
2
+ Name: gdarch
3
+ Version: 0.1.0
4
+ Summary: CLI tool to archive a Google Drive folder and replace it with the archive. Helps to free up Google Drive storage space by compressing rarely accessed folders while keeping them accessible.
5
+ Home-page: https://github.com/taross-f/gdarch
6
+ License: MIT
7
+ Keywords: google-drive,archive,compression,backup,storage-management
8
+ Author: Taro Furuya
9
+ Author-email: taro.furuya@gmail.com
10
+ Requires-Python: >=3.8,<4.0
11
+ Classifier: Development Status :: 4 - Beta
12
+ Classifier: Environment :: Console
13
+ Classifier: Intended Audience :: End Users/Desktop
14
+ Classifier: License :: OSI Approved :: MIT License
15
+ Classifier: Operating System :: OS Independent
16
+ Classifier: Programming Language :: Python :: 3
17
+ Classifier: Programming Language :: Python :: 3.8
18
+ Classifier: Programming Language :: Python :: 3.9
19
+ Classifier: Programming Language :: Python :: 3.10
20
+ Classifier: Programming Language :: Python :: 3.11
21
+ Classifier: Programming Language :: Python :: 3.12
22
+ Classifier: Topic :: System :: Archiving :: Compression
23
+ Classifier: Topic :: Utilities
24
+ Requires-Dist: google-api-python-client (>=2.0.0,<3.0.0)
25
+ Requires-Dist: google-auth (>=2.0.0,<3.0.0)
26
+ Requires-Dist: google-auth-oauthlib (>=1.0.0,<2.0.0)
27
+ Requires-Dist: requests (>=2.25.0,<3.0.0)
28
+ Project-URL: Repository, https://github.com/taross-f/gdarch
29
+ Description-Content-Type: text/markdown
30
+
31
+ # gdarch
32
+
33
+ [![CI](https://github.com/taross-f/gdarch/actions/workflows/ci.yml/badge.svg)](https://github.com/taross-f/gdarch/actions/workflows/ci.yml)
34
+ [![codecov](https://codecov.io/gh/taross-f/gdarch/branch/main/graph/badge.svg)](https://codecov.io/gh/taross-f/gdarch)
35
+
36
+ A CLI tool to archive Google Drive folders and replace them with compressed archives.
37
+
38
+ ## Motivation
39
+
40
+ Google Drive storage space is often filled with large folders that are rarely accessed but need to be kept for reference or backup purposes. This tool helps you free up storage space by:
41
+
42
+ 1. Automatically compressing such folders into high-compression archives
43
+ 2. Replacing the original folders with their compressed versions
44
+ 3. Maintaining the same folder structure and accessibility
45
+
46
+ This way, you can keep your important data while significantly reducing storage usage.
47
+
48
+ ## Features
49
+
50
+ - Recursively downloads all files from a specified Google Drive folder
51
+ - Creates a high-compression tar.xz archive
52
+ - Uploads the archive to the parent folder
53
+ - Optionally deletes the original folder
54
+
55
+ ## Installation
56
+
57
+ ### From PyPI
58
+ ```bash
59
+ pip install gdarch
60
+ ```
61
+
62
+ ### From Source
63
+ ```bash
64
+ # Install Poetry (if not already installed)
65
+ curl -sSL https://install.python-poetry.org | python3 -
66
+
67
+ # Clone and install
68
+ git clone https://github.com/taross-f/gdarch.git
69
+ cd gdarch
70
+ poetry install
71
+ ```
72
+
73
+ ## Usage
74
+
75
+ 1. Get OAuth2 credentials from Google Cloud Console:
76
+ - Visit [Google Cloud Console](https://console.cloud.google.com/)
77
+ - Create or select a project
78
+ - Go to APIs & Services > Credentials
79
+ - Create an OAuth 2.0 Client ID
80
+ - Download the credentials and save as `credentials.json`
81
+
82
+ 2. Run the command:
83
+
84
+ ```bash
85
+ # When installed from PyPI
86
+ gdarch --folder-id <TARGET_FOLDER_ID> --credentials credentials.json
87
+
88
+ # When installed from source (using Poetry)
89
+ poetry run gdarch --folder-id <TARGET_FOLDER_ID> --credentials credentials.json
90
+
91
+ # Archive and delete the original folder
92
+ gdarch --folder-id <TARGET_FOLDER_ID> --credentials credentials.json --delete-folder
93
+
94
+ # Specify a custom archive name
95
+ gdarch --folder-id <TARGET_FOLDER_ID> --archive-name my_archive.tar.xz --credentials credentials.json
96
+ ```
97
+
98
+ ### Options
99
+
100
+ - `--folder-id`: Google Drive folder ID to archive (required)
101
+ - `--credentials`: Path to OAuth2 credentials file (defaults to credentials.json)
102
+ - `--archive-name`: Name for the uploaded archive file (optional)
103
+ - `--delete-folder`: Delete the original folder after archiving (flag)
104
+
105
+ ### Finding Folder ID
106
+
107
+ The folder ID is the last part of the Google Drive folder URL:
108
+ ```
109
+ https://drive.google.com/drive/folders/1234567890abcdef
110
+ ^^^^^^^^^^^^^^^^
111
+ This is your folder ID
112
+ ```
113
+
114
+ ## Development
115
+
116
+ ```bash
117
+ # Install dependencies
118
+ poetry install
119
+
120
+ # Run tests
121
+ poetry run pytest
122
+
123
+ # Format code
124
+ poetry run black .
125
+ poetry run isort .
126
+ ```
127
+
128
+ ## How It Works
129
+
130
+ 1. Authenticates with Google Drive using OAuth2
131
+ 2. Recursively lists all files in the specified folder
132
+ 3. Downloads files while streaming them directly into a tar.xz archive
133
+ 4. Uploads the compressed archive to the parent folder
134
+ 5. Optionally deletes the original folder
135
+ 6. Cleans up temporary files
136
+
137
+ ## License
138
+
139
+ MIT License
140
+
@@ -0,0 +1,7 @@
1
+ gdarch/__init__.py,sha256=kUR5RAFc7HCeiqdlX36dZOHkUI5wI6V_43RpEcD8b-0,22
2
+ gdarch/cli.py,sha256=NgSNG3aUpt0PInn5VfqPCJJkh98S7VpvJuVkTGS0IKM,9777
3
+ gdarch-0.1.0.dist-info/LICENSE,sha256=CBYYPBEwlLLNljBvM5zvVymajD7Cd-DLpPyN_f0d8Lk,1068
4
+ gdarch-0.1.0.dist-info/METADATA,sha256=PmvCiiXyxnkoR8JKwE0hIi6A-zgz_gvDyYGOXu7nQBI,4593
5
+ gdarch-0.1.0.dist-info/WHEEL,sha256=FMvqSimYX_P7y0a7UY-_Mc83r5zkBZsCYPm7Lr0Bsq4,88
6
+ gdarch-0.1.0.dist-info/entry_points.txt,sha256=duJYwjiEX8HSYLwjDLpJ4D1QF2y8XtE7JX6Bvy9Qu4Y,42
7
+ gdarch-0.1.0.dist-info/RECORD,,
@@ -0,0 +1,4 @@
1
+ Wheel-Version: 1.0
2
+ Generator: poetry-core 1.8.1
3
+ Root-Is-Purelib: true
4
+ Tag: py3-none-any
@@ -0,0 +1,3 @@
1
+ [console_scripts]
2
+ gdarch=gdarch.cli:main
3
+