dedrive 0.1.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,8 @@
1
+ # Dependabot configuration for automated dependency updates
2
+ # https://docs.github.com/en/code-security/dependabot/dependabot-version-updates
3
+ version: 2
4
+ updates:
5
+ - package-ecosystem: "pip"
6
+ directory: "/"
7
+ schedule:
8
+ interval: "weekly"
@@ -0,0 +1,46 @@
1
+ name: Release
2
+ on:
3
+ push:
4
+ branches: [main]
5
+ paths: [dedrive/__init__.py]
6
+ concurrency:
7
+ group: release
8
+ cancel-in-progress: false
9
+ jobs:
10
+ test:
11
+ uses: tsilva/.github/.github/workflows/test.yml@main
12
+
13
+ pii-scan:
14
+ uses: tsilva/.github/.github/workflows/pii-scan.yml@main
15
+
16
+ publish:
17
+ needs: [test, pii-scan]
18
+ runs-on: ubuntu-latest
19
+ timeout-minutes: 15
20
+ environment: pypi
21
+ permissions:
22
+ contents: write
23
+ id-token: write
24
+ steps:
25
+ - uses: actions/checkout@v4
26
+ with:
27
+ fetch-depth: 0
28
+ - name: Check version
29
+ id: check-version
30
+ uses: tsilva/.github/.github/actions/check-version@main
31
+ - name: Setup uv
32
+ if: steps.check-version.outputs.tag_exists == 'false'
33
+ uses: astral-sh/setup-uv@v4
34
+ with:
35
+ cache: true
36
+ - name: Build package
37
+ if: steps.check-version.outputs.tag_exists == 'false'
38
+ run: uv build
39
+ - name: Create GitHub release
40
+ if: steps.check-version.outputs.tag_exists == 'false'
41
+ run: gh release create "${{ steps.check-version.outputs.tag }}" --title "${{ steps.check-version.outputs.tag }}" --generate-notes dist/*
42
+ env:
43
+ GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
44
+ - name: Publish to PyPI
45
+ if: steps.check-version.outputs.tag_exists == 'false'
46
+ uses: pypa/gh-action-pypi-publish@release/v1
@@ -0,0 +1,135 @@
1
+ # >>> MANAGED BY GITGUARD - DO NOT EDIT THIS SECTION <<<
2
+ .claude/*.local.json
3
+ .claude/*.local.json.bak
4
+ .claude-sandbox.json
5
+ .mcp.json
6
+ .env
7
+ .env.*
8
+ !.env.example
9
+ !.env.*.example
10
+ .env.*.local
11
+ .env.local
12
+ *.pem
13
+ *.key
14
+ *.p12
15
+ *.pfx
16
+ *.gpg
17
+ *.secret
18
+ *-credentials.json
19
+ service-account*.json
20
+ credentials.json
21
+ secrets.json
22
+ .secrets/
23
+ .aws/
24
+ .ssh/
25
+ config.local.*
26
+ .DS_Store
27
+ Thumbs.db
28
+ .idea/
29
+ .vscode/
30
+ *.swp
31
+ *.swo
32
+ *.code-workspace
33
+ __pycache__/
34
+ *.py[cod]
35
+ .venv/
36
+ venv/
37
+ env/
38
+ *.egg-info/
39
+ node_modules/
40
+ .npm/
41
+ npm-debug.log*
42
+ yarn-debug.log*
43
+ yarn-error.log*
44
+ dist/
45
+ build/
46
+ logs/
47
+ *.log
48
+ # >>> END MANAGED <<<
49
+
50
+ # Project-specific rules
51
+ !.output/.gitkeep
52
+ *$py.class
53
+ *.cover
54
+ *.egg
55
+ *.manifest
56
+ *.mo
57
+ *.pot
58
+ *.py.cover
59
+ *.py[codz]
60
+ *.sage.py
61
+ *.so
62
+ *.spec
63
+ .abstra/
64
+ .cache
65
+ .coverage
66
+ .coverage.*
67
+ .cursorignore
68
+ .cursorindexingignore
69
+ .dmypy.json
70
+ .eggs/
71
+ .envrc
72
+ .hypothesis/
73
+ .installed.cfg
74
+ .ipynb_checkpoints
75
+ .mypy_cache/
76
+ .nox/
77
+ .output/*
78
+ .pdm-build/
79
+ .pdm-python
80
+ .pixi
81
+ .pybuilder/
82
+ .pypirc
83
+ .pyre/
84
+ .pytest_cache/
85
+ .Python
86
+ .pytype/
87
+ .ropeproject
88
+ .ruff_cache/
89
+ .scrapy
90
+ .spyderproject
91
+ .spyproject
92
+ .tox/
93
+ .venv
94
+ .webassets-cache
95
+ /site
96
+ __marimo__/
97
+ __pypackages__/
98
+ celerybeat-schedule
99
+ celerybeat.pid
100
+ cover/
101
+ coverage.xml
102
+ cython_debug/
103
+ db.sqlite3
104
+ db.sqlite3-journal
105
+ develop-eggs/
106
+ dmypy.json
107
+ docs/_build/
108
+ downloads/
109
+ eggs/
110
+ env.bak/
111
+ ENV/
112
+ htmlcov/
113
+ instance/
114
+ ipython_config.py
115
+ lib/
116
+ lib64/
117
+ local_settings.py
118
+ MANIFEST
119
+ marimo/_lsp/
120
+ marimo/_static/
121
+ nosetests.xml
122
+ parts/
123
+ pip-delete-this-directory.txt
124
+ pip-log.txt
125
+ profile_default/
126
+ profiles/*
127
+ sdist/
128
+ share/python-wheels/
129
+ target/
130
+ token.json
131
+ var/
132
+ venv.bak/
133
+ wheels/
134
+ !profiles/.example
135
+ !profiles/.example/**
@@ -0,0 +1,5 @@
1
+ repos:
2
+ - repo: https://github.com/tsilva/.github
3
+ rev: main
4
+ hooks:
5
+ - id: gitleaks
@@ -0,0 +1,177 @@
1
+ # CLAUDE.md
2
+
3
+ This file provides guidance to Claude Code (claude.ai/code) when working with code in this repository.
4
+
5
+ ## Commands
6
+
7
+ ```bash
8
+ # Install dependencies
9
+ uv sync
10
+
11
+ # Install as standalone CLI tool
12
+ uv tool install . --editable
13
+
14
+ # Login (opens browser, auto-creates profile from email)
15
+ dedrive login
16
+
17
+ # Logout
18
+ dedrive logout
19
+
20
+ # List profiles with login status
21
+ dedrive --list-profiles
22
+
23
+ # Launch the Gradio web UI (auto-detects logged-in profile)
24
+ dedrive
25
+
26
+ # Launch on a custom port
27
+ dedrive --port 8080
28
+
29
+ # Enable public sharing link
30
+ dedrive --share
31
+
32
+ # Enable verbose/debug logging
33
+ dedrive --verbose
34
+
35
+ # Write logs to file
36
+ dedrive --log-file debug.log
37
+
38
+ # Validate credentials without launching UI
39
+ dedrive --profile work --validate
40
+
41
+ # Backward-compatible: run via main.py
42
+ uv run main.py login
43
+ uv run main.py --list-profiles
44
+ ```
45
+
46
+ **Note:** PDF preview in the web UI requires poppler: `brew install poppler` (macOS)
47
+
48
+ ## Configuration
49
+
50
+ All settings can be configured via environment variables, `config.json`, or profiles. Precedence: profile config.yaml > ENV > config.json > defaults.
51
+
52
+ ### Environment Variables
53
+
54
+ | Variable | Default | Description |
55
+ |----------|---------|-------------|
56
+ | `GDRIVE_CREDENTIALS_PATH` | `credentials.json` | Path to OAuth credentials file |
57
+ | `GDRIVE_TOKEN_PATH` | (next to credentials) | Path to OAuth token file |
58
+ | `GDRIVE_OUTPUT_DIR` | `.output` | Directory for output files |
59
+ | `GDRIVE_DUPES_FOLDER` | `/_dupes` | Folder name for duplicates in Drive |
60
+ | `GDRIVE_BATCH_SIZE` | `100` | Batch size for API operations |
61
+ | `GDRIVE_MAX_PREVIEW_MB` | `10` | Max file size for previews (MB) |
62
+ | `GDRIVE_EXCLUDE_PATHS` | (none) | Comma-separated paths to exclude |
63
+
64
+ ### Config File
65
+
66
+ Create `config.json` in the project root:
67
+
68
+ ```json
69
+ {
70
+ "credentials_path": "~/.config/dedrive/credentials.json",
71
+ "output_dir": "~/.local/share/dedrive",
72
+ "dupes_folder": "/_dupes",
73
+ "batch_size": 100,
74
+ "max_preview_mb": 10,
75
+ "exclude_paths": [
76
+ "/Backup/Old",
77
+ "/tmp"
78
+ ]
79
+ }
80
+ ```
81
+
82
+ Paths support `~` expansion for home directory.
83
+
84
+ ### Exclude Paths
85
+
86
+ Folders can be excluded from scans using two methods (combined):
87
+
88
+ 1. **Config file:** Add `exclude_paths` array to `config.json`
89
+ 2. **Environment variable:** Set `GDRIVE_EXCLUDE_PATHS` (comma-separated)
90
+
91
+ Example `.env` file:
92
+ ```
93
+ GDRIVE_EXCLUDE_PATHS=/documentor-puzzle/export,/Backup/Old
94
+ ```
95
+
96
+ ### Config File Fallback
97
+
98
+ `config.json` is checked in cwd first, then falls back to `~/.dedrive/config.json`.
99
+
100
+ ### Credentials Fallback
101
+
102
+ `credentials.json` is checked in cwd first, then falls back to `~/.dedrive/credentials.json`.
103
+
104
+ ### Profiles
105
+
106
+ Profiles allow targeting multiple Google Drive accounts. Each profile is a subfolder under `~/.dedrive/` with its own credentials, token, config, and output data. Profiles are auto-created on `dedrive login` using the Google account email as the profile name.
107
+
108
+ ```
109
+ ~/.dedrive/
110
+ credentials.json # Shared OAuth client credentials (fallback)
111
+ config.json # Shared config (fallback)
112
+ user@gmail.com/
113
+ config.yaml # Profile settings (YAML)
114
+ token.json # OAuth token (auto-generated)
115
+ .output/ # Scan results, decisions, logs
116
+ ```
117
+
118
+ Example `config.yaml`:
119
+
120
+ ```yaml
121
+ dupes_folder: /_dupes
122
+ batch_size: 100
123
+ max_preview_mb: 10
124
+ exclude_paths:
125
+ - /Backup/Old
126
+ - /tmp
127
+ ```
128
+
129
+ When `--profile <name>` is used, `config.py` resolves credentials, token, and output paths from the profile directory. Profile `config.yaml` values slot into the precedence chain between CLI args and environment variables.
130
+
131
+ ## Architecture
132
+
133
+ Installable CLI tool (`dedrive`) with a Gradio web UI for finding and managing duplicate files in Google Drive.
134
+
135
+ **Package structure:**
136
+ - `dedrive/cli.py` — CLI entry point with `login`, `logout` subcommands and Gradio launcher
137
+ - `dedrive/ui.py` — Gradio web UI (all UI code, dataclasses, helpers)
138
+ - `dedrive/drive.py` — Google Drive authentication, API fetch, path resolution
139
+ - `dedrive/dedup.py` — Duplicate detection logic
140
+ - `dedrive/config.py` — Configuration management with fallback chain
141
+ - `dedrive/profiles.py` — Profile management (profiles stored in `~/.dedrive/`)
142
+ - `main.py` — Thin wrapper for backward compatibility (`uv run main.py`)
143
+
144
+ **Features:**
145
+ - **CLI:** `login` (browser-based OAuth), `logout` (remove token), `--list-profiles`
146
+ - **Scan Tab:** Run scans with progress feedback
147
+ - **Review Tab:** Side-by-side file comparison with previews, make keep/skip decisions
148
+ - **Export Tab:** Preview and execute moves, export decisions.json
149
+ - **CLI flags:** Profile selection (`--profile`), credential validation (`--validate`), logging (`--verbose`, `--log-file`), Gradio options (`--port`, `--share`)
150
+
151
+ **Key design decisions:**
152
+ - Profiles auto-created on login using Google account email as name
153
+ - Profiles stored in `~/.dedrive/` (works when installed as standalone CLI)
154
+ - `login` subcommand opens browser for OAuth without importing Gradio
155
+ - Uses `drive` scope (full access for file moves)
156
+ - Fetches all files in one query then filters locally (faster than recursive folder traversal)
157
+ - Path resolution uses memoization (`path_cache`) for efficiency
158
+ - Files with same MD5 but different size marked as "uncertain"
159
+ - Google Workspace files (Docs, Sheets) skipped (no MD5 available)
160
+ - Decisions auto-save to `.output/decisions.json` (resume sessions)
161
+ - File previews cached in `.output/preview_cache/`
162
+
163
+ **Output:** `.output/scan_results.json` (scan results), `.output/decisions.json` (user decisions), `.output/execution_log.json` (move results)
164
+
165
+ ### Moving Duplicates
166
+
167
+ Instead of deleting duplicates, files are moved to a `/_dupes` folder at the root of your Google Drive:
168
+
169
+ 1. **Scan** your drive to find duplicates
170
+ 2. **Review** and mark which files to keep
171
+ 3. **Preview (Dry Run)** to see what would be moved
172
+ 4. **Execute** to move duplicates to `/_dupes`
173
+
174
+ The original folder structure is preserved under `/_dupes`:
175
+ - `/Photos/2024/IMG.jpg` → `/_dupes/Photos/2024/IMG.jpg`
176
+
177
+ **Re-authentication required:** If you previously used this tool with read-only access, delete `token.json` and re-authenticate to grant move permissions.
@@ -0,0 +1 @@
1
+ AGENTS.md
dedrive-0.1.0/LICENSE ADDED
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2026 Tiago Silva
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
dedrive-0.1.0/Makefile ADDED
@@ -0,0 +1,5 @@
1
+ release-%:
2
+ hatch version $*
3
+ git add dedrive/__init__.py
4
+ git commit -m "chore: release $$(hatch version)"
5
+ git push
dedrive-0.1.0/PKG-INFO ADDED
@@ -0,0 +1,224 @@
1
+ Metadata-Version: 2.4
2
+ Name: dedrive
3
+ Version: 0.1.0
4
+ Summary: Find duplicate files in Google Drive using MD5 checksums
5
+ Project-URL: Homepage, https://github.com/tsilva/dedrive
6
+ Project-URL: Repository, https://github.com/tsilva/dedrive
7
+ Project-URL: Issues, https://github.com/tsilva/dedrive/issues
8
+ Author-email: Tiago Silva <tiago@tsilva.com>
9
+ License: MIT
10
+ License-File: LICENSE
11
+ Keywords: cleanup,deduplication,duplicates,google-drive,md5,storage
12
+ Classifier: Development Status :: 4 - Beta
13
+ Classifier: Environment :: Console
14
+ Classifier: Environment :: Web Environment
15
+ Classifier: Intended Audience :: Developers
16
+ Classifier: Intended Audience :: End Users/Desktop
17
+ Classifier: License :: OSI Approved :: MIT License
18
+ Classifier: Programming Language :: Python :: 3
19
+ Classifier: Programming Language :: Python :: 3.10
20
+ Classifier: Programming Language :: Python :: 3.11
21
+ Classifier: Programming Language :: Python :: 3.12
22
+ Classifier: Programming Language :: Python :: 3.13
23
+ Classifier: Topic :: System :: Filesystems
24
+ Classifier: Topic :: Utilities
25
+ Requires-Python: >=3.10
26
+ Requires-Dist: google-api-python-client
27
+ Requires-Dist: google-auth-httplib2
28
+ Requires-Dist: google-auth-oauthlib
29
+ Requires-Dist: gradio>=4.0.0
30
+ Requires-Dist: pdf2image>=1.16.0
31
+ Requires-Dist: python-dotenv>=1.0.0
32
+ Requires-Dist: pyyaml>=6.0
33
+ Provides-Extra: dev
34
+ Requires-Dist: mypy>=1.10.0; extra == 'dev'
35
+ Requires-Dist: pytest-cov>=5.0.0; extra == 'dev'
36
+ Requires-Dist: pytest>=8.0.0; extra == 'dev'
37
+ Requires-Dist: ruff>=0.5.0; extra == 'dev'
38
+ Description-Content-Type: text/markdown
39
+
40
+ <div align="center">
41
+ <img src="logo.png" alt="dedrive" width="512"/>
42
+
43
+ [![Python](https://img.shields.io/badge/Python-3.10+-3776AB?logo=python&logoColor=white)](https://python.org)
44
+ [![License](https://img.shields.io/badge/License-MIT-blue.svg)](LICENSE)
45
+ [![Google Drive API](https://img.shields.io/badge/Google%20Drive-API%20v3-4285F4?logo=googledrive&logoColor=white)](https://developers.google.com/drive)
46
+
47
+ **🔍 Find and manage duplicate files in Google Drive using MD5 checksums 🗑️**
48
+
49
+ [Features](#features) · [Quick Start](#quick-start) · [Configuration](#configuration) · [Usage](#usage)
50
+ </div>
51
+
52
+ ---
53
+
54
+ ## Features
55
+
56
+ - **Fast MD5-based detection** - Identifies duplicates by comparing file checksums, not just names
57
+ - **Two interfaces** - CLI for quick scans, Web UI for interactive review with file previews
58
+ - **Non-destructive** - Moves duplicates to `/_dupes` folder instead of deleting them
59
+ - **Preserves structure** - Original folder hierarchy is maintained under the dupes folder
60
+ - **Resumable sessions** - Decisions auto-save and persist across sessions
61
+ - **Flexible filtering** - Scan specific paths and exclude folders from analysis
62
+ - **Multi-account profiles** - Target different Google Drive accounts with named profiles
63
+
64
+ ## Quick Start
65
+
66
+ ```bash
67
+ # Install with uv
68
+ uv sync
69
+
70
+ # Launch the web UI
71
+ uv run main.py
72
+ ```
73
+
74
+ **First run:** A browser window will open for Google OAuth authentication. Grant access to your Google Drive.
75
+
76
+ ## Installation
77
+
78
+ ### Prerequisites
79
+
80
+ - Python 3.10+
81
+ - [uv](https://docs.astral.sh/uv/) package manager
82
+ - Google Cloud OAuth credentials ([setup guide](#google-cloud-setup))
83
+
84
+ ### Google Cloud Setup
85
+
86
+ 1. Go to [Google Cloud Console](https://console.cloud.google.com/apis/credentials)
87
+ 2. Create a project (or select existing)
88
+ 3. Enable the **Google Drive API**
89
+ 4. Create **OAuth 2.0 Client ID** (choose "Desktop app")
90
+ 5. Download the JSON file and save as `credentials.json` in the project root
91
+
92
+ ## Usage
93
+
94
+ ### CLI Tool
95
+
96
+ ```bash
97
+ # Scan entire drive
98
+ uv run main.py
99
+
100
+ # Scan specific folder
101
+ uv run main.py --path "/Photos"
102
+
103
+ # Exclude folders
104
+ uv run main.py --exclude "/Backup/Old" --exclude "/tmp"
105
+
106
+ # Custom output location
107
+ uv run main.py --output results.csv
108
+
109
+ # Validate credentials
110
+ uv run main.py --validate
111
+
112
+ # Debug logging
113
+ uv run main.py --verbose --log-file debug.log
114
+
115
+ # Profiles (multiple Google accounts)
116
+ uv run main.py --init-profile work
117
+ uv run main.py --list-profiles
118
+ uv run main.py --profile work
119
+ ```
120
+
121
+ ### Web UI
122
+
123
+ ```bash
124
+ uv run main.py
125
+ ```
126
+
127
+ The web interface provides three tabs:
128
+
129
+ | Tab | Purpose |
130
+ |-----|---------|
131
+ | **Scan** | Run scans with path filtering and progress feedback |
132
+ | **Review** | Side-by-side comparison with file previews, make keep/skip decisions |
133
+ | **Export** | Preview moves (dry run), execute moves, export decisions to JSON |
134
+
135
+ **Note:** PDF preview requires poppler: `brew install poppler` (macOS)
136
+
137
+ ### Moving Duplicates
138
+
139
+ Instead of deleting, duplicates are moved to `/_dupes` at Drive root:
140
+
141
+ ```
142
+ /Photos/2024/IMG.jpg → /_dupes/Photos/2024/IMG.jpg
143
+ ```
144
+
145
+ 1. **Scan** - Find duplicates
146
+ 2. **Review** - Mark which files to keep
147
+ 3. **Preview** - Dry run to see what would move
148
+ 4. **Execute** - Move duplicates to `/_dupes`
149
+
150
+ ## Configuration
151
+
152
+ Settings can be configured via environment variables, `config.json`, or CLI arguments.
153
+
154
+ **Precedence:** CLI > Profile config.yaml > Environment > Config file > Defaults
155
+
156
+ ### Environment Variables
157
+
158
+ | Variable | Default | Description |
159
+ |----------|---------|-------------|
160
+ | `GDRIVE_CREDENTIALS_PATH` | `credentials.json` | OAuth credentials file |
161
+ | `GDRIVE_TOKEN_PATH` | (next to credentials) | OAuth token file |
162
+ | `GDRIVE_OUTPUT_DIR` | `.output` | Output directory |
163
+ | `GDRIVE_DUPES_FOLDER` | `/_dupes` | Folder for duplicates |
164
+ | `GDRIVE_BATCH_SIZE` | `100` | Batch size for API operations |
165
+ | `GDRIVE_MAX_PREVIEW_MB` | `10` | Max file size for previews |
166
+ | `GDRIVE_EXCLUDE_PATHS` | (none) | Comma-separated paths to exclude |
167
+
168
+ ### Config File
169
+
170
+ Create `config.json` in the project root:
171
+
172
+ ```json
173
+ {
174
+ "credentials_path": "~/.config/dedrive/credentials.json",
175
+ "output_dir": "~/.local/share/dedrive",
176
+ "dupes_folder": "/_dupes",
177
+ "batch_size": 100,
178
+ "exclude_paths": ["/Backup/Old", "/tmp"]
179
+ }
180
+ ```
181
+
182
+ ### Profiles
183
+
184
+ Use profiles to manage multiple Google Drive accounts:
185
+
186
+ ```bash
187
+ # Create a profile
188
+ uv run main.py --init-profile work
189
+
190
+ # Copy credentials into the profile
191
+ cp ~/Downloads/credentials.json profiles/work/
192
+
193
+ # Use the profile
194
+ uv run main.py --profile work
195
+ ```
196
+
197
+ Each profile stores its own `credentials.json`, `token.json`, `config.yaml`, and `.output/` under `profiles/<name>/`.
198
+
199
+ ## Output Files
200
+
201
+ | File | Description |
202
+ |------|-------------|
203
+ | `.output/duplicates.csv` | Scan results with duplicate pairs |
204
+ | `.output/decisions.json` | User decisions (auto-saved) |
205
+ | `.output/execution_log.json` | Move operation results |
206
+ | `.output/scan_results.json` | Cached scan data for session resume |
207
+
208
+ ## How It Works
209
+
210
+ 1. **OAuth authentication** - Cached in `token.json` after first login
211
+ 2. **Single API call** - Fetches all files with MD5 metadata in one paginated request
212
+ 3. **In-memory path resolution** - Builds paths from parent IDs with memoization
213
+ 4. **MD5 grouping** - Groups files by checksum to identify duplicates
214
+ 5. **Size validation** - Files with same MD5 but different sizes flagged as "uncertain"
215
+
216
+ **Note:** Google Workspace files (Docs, Sheets, Slides) are skipped as they don't have MD5 checksums.
217
+
218
+ ## Re-authentication
219
+
220
+ If you previously used this tool with read-only access, delete `token.json` and re-authenticate to grant move permissions.
221
+
222
+ ## License
223
+
224
+ MIT