dedrive 0.1.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- dedrive-0.1.0/.github/dependabot.yml +8 -0
- dedrive-0.1.0/.github/workflows/release.yml +46 -0
- dedrive-0.1.0/.gitignore +135 -0
- dedrive-0.1.0/.pre-commit-config.yaml +5 -0
- dedrive-0.1.0/AGENTS.md +177 -0
- dedrive-0.1.0/CLAUDE.md +1 -0
- dedrive-0.1.0/LICENSE +21 -0
- dedrive-0.1.0/Makefile +5 -0
- dedrive-0.1.0/PKG-INFO +224 -0
- dedrive-0.1.0/README.md +185 -0
- dedrive-0.1.0/dedrive/__init__.py +84 -0
- dedrive-0.1.0/dedrive/cli.py +259 -0
- dedrive-0.1.0/dedrive/config.py +284 -0
- dedrive-0.1.0/dedrive/dedup.py +156 -0
- dedrive-0.1.0/dedrive/drive.py +347 -0
- dedrive-0.1.0/dedrive/profiles.py +78 -0
- dedrive-0.1.0/dedrive/ui.py +1450 -0
- dedrive-0.1.0/logo.png +0 -0
- dedrive-0.1.0/main.py +7 -0
- dedrive-0.1.0/pyproject.toml +63 -0
- dedrive-0.1.0/uv.lock +2320 -0
|
@@ -0,0 +1,46 @@
|
|
|
1
|
+
name: Release
|
|
2
|
+
on:
|
|
3
|
+
push:
|
|
4
|
+
branches: [main]
|
|
5
|
+
paths: [dedrive/__init__.py]
|
|
6
|
+
concurrency:
|
|
7
|
+
group: release
|
|
8
|
+
cancel-in-progress: false
|
|
9
|
+
jobs:
|
|
10
|
+
test:
|
|
11
|
+
uses: tsilva/.github/.github/workflows/test.yml@main
|
|
12
|
+
|
|
13
|
+
pii-scan:
|
|
14
|
+
uses: tsilva/.github/.github/workflows/pii-scan.yml@main
|
|
15
|
+
|
|
16
|
+
publish:
|
|
17
|
+
needs: [test, pii-scan]
|
|
18
|
+
runs-on: ubuntu-latest
|
|
19
|
+
timeout-minutes: 15
|
|
20
|
+
environment: pypi
|
|
21
|
+
permissions:
|
|
22
|
+
contents: write
|
|
23
|
+
id-token: write
|
|
24
|
+
steps:
|
|
25
|
+
- uses: actions/checkout@v4
|
|
26
|
+
with:
|
|
27
|
+
fetch-depth: 0
|
|
28
|
+
- name: Check version
|
|
29
|
+
id: check-version
|
|
30
|
+
uses: tsilva/.github/.github/actions/check-version@main
|
|
31
|
+
- name: Setup uv
|
|
32
|
+
if: steps.check-version.outputs.tag_exists == 'false'
|
|
33
|
+
uses: astral-sh/setup-uv@v4
|
|
34
|
+
with:
|
|
35
|
+
cache: true
|
|
36
|
+
- name: Build package
|
|
37
|
+
if: steps.check-version.outputs.tag_exists == 'false'
|
|
38
|
+
run: uv build
|
|
39
|
+
- name: Create GitHub release
|
|
40
|
+
if: steps.check-version.outputs.tag_exists == 'false'
|
|
41
|
+
run: gh release create "${{ steps.check-version.outputs.tag }}" --title "${{ steps.check-version.outputs.tag }}" --generate-notes dist/*
|
|
42
|
+
env:
|
|
43
|
+
GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
|
|
44
|
+
- name: Publish to PyPI
|
|
45
|
+
if: steps.check-version.outputs.tag_exists == 'false'
|
|
46
|
+
uses: pypa/gh-action-pypi-publish@release/v1
|
dedrive-0.1.0/.gitignore
ADDED
|
@@ -0,0 +1,135 @@
|
|
|
1
|
+
# >>> MANAGED BY GITGUARD - DO NOT EDIT THIS SECTION <<<
|
|
2
|
+
.claude/*.local.json
|
|
3
|
+
.claude/*.local.json.bak
|
|
4
|
+
.claude-sandbox.json
|
|
5
|
+
.mcp.json
|
|
6
|
+
.env
|
|
7
|
+
.env.*
|
|
8
|
+
!.env.example
|
|
9
|
+
!.env.*.example
|
|
10
|
+
.env.*.local
|
|
11
|
+
.env.local
|
|
12
|
+
*.pem
|
|
13
|
+
*.key
|
|
14
|
+
*.p12
|
|
15
|
+
*.pfx
|
|
16
|
+
*.gpg
|
|
17
|
+
*.secret
|
|
18
|
+
*-credentials.json
|
|
19
|
+
service-account*.json
|
|
20
|
+
credentials.json
|
|
21
|
+
secrets.json
|
|
22
|
+
.secrets/
|
|
23
|
+
.aws/
|
|
24
|
+
.ssh/
|
|
25
|
+
config.local.*
|
|
26
|
+
.DS_Store
|
|
27
|
+
Thumbs.db
|
|
28
|
+
.idea/
|
|
29
|
+
.vscode/
|
|
30
|
+
*.swp
|
|
31
|
+
*.swo
|
|
32
|
+
*.code-workspace
|
|
33
|
+
__pycache__/
|
|
34
|
+
*.py[cod]
|
|
35
|
+
.venv/
|
|
36
|
+
venv/
|
|
37
|
+
env/
|
|
38
|
+
*.egg-info/
|
|
39
|
+
node_modules/
|
|
40
|
+
.npm/
|
|
41
|
+
npm-debug.log*
|
|
42
|
+
yarn-debug.log*
|
|
43
|
+
yarn-error.log*
|
|
44
|
+
dist/
|
|
45
|
+
build/
|
|
46
|
+
logs/
|
|
47
|
+
*.log
|
|
48
|
+
# >>> END MANAGED <<<
|
|
49
|
+
|
|
50
|
+
# Project-specific rules
|
|
51
|
+
!.output/.gitkeep
|
|
52
|
+
*$py.class
|
|
53
|
+
*.cover
|
|
54
|
+
*.egg
|
|
55
|
+
*.manifest
|
|
56
|
+
*.mo
|
|
57
|
+
*.pot
|
|
58
|
+
*.py.cover
|
|
59
|
+
*.py[codz]
|
|
60
|
+
*.sage.py
|
|
61
|
+
*.so
|
|
62
|
+
*.spec
|
|
63
|
+
.abstra/
|
|
64
|
+
.cache
|
|
65
|
+
.coverage
|
|
66
|
+
.coverage.*
|
|
67
|
+
.cursorignore
|
|
68
|
+
.cursorindexingignore
|
|
69
|
+
.dmypy.json
|
|
70
|
+
.eggs/
|
|
71
|
+
.envrc
|
|
72
|
+
.hypothesis/
|
|
73
|
+
.installed.cfg
|
|
74
|
+
.ipynb_checkpoints
|
|
75
|
+
.mypy_cache/
|
|
76
|
+
.nox/
|
|
77
|
+
.output/*
|
|
78
|
+
.pdm-build/
|
|
79
|
+
.pdm-python
|
|
80
|
+
.pixi
|
|
81
|
+
.pybuilder/
|
|
82
|
+
.pypirc
|
|
83
|
+
.pyre/
|
|
84
|
+
.pytest_cache/
|
|
85
|
+
.Python
|
|
86
|
+
.pytype/
|
|
87
|
+
.ropeproject
|
|
88
|
+
.ruff_cache/
|
|
89
|
+
.scrapy
|
|
90
|
+
.spyderproject
|
|
91
|
+
.spyproject
|
|
92
|
+
.tox/
|
|
93
|
+
.venv
|
|
94
|
+
.webassets-cache
|
|
95
|
+
/site
|
|
96
|
+
__marimo__/
|
|
97
|
+
__pypackages__/
|
|
98
|
+
celerybeat-schedule
|
|
99
|
+
celerybeat.pid
|
|
100
|
+
cover/
|
|
101
|
+
coverage.xml
|
|
102
|
+
cython_debug/
|
|
103
|
+
db.sqlite3
|
|
104
|
+
db.sqlite3-journal
|
|
105
|
+
develop-eggs/
|
|
106
|
+
dmypy.json
|
|
107
|
+
docs/_build/
|
|
108
|
+
downloads/
|
|
109
|
+
eggs/
|
|
110
|
+
env.bak/
|
|
111
|
+
ENV/
|
|
112
|
+
htmlcov/
|
|
113
|
+
instance/
|
|
114
|
+
ipython_config.py
|
|
115
|
+
lib/
|
|
116
|
+
lib64/
|
|
117
|
+
local_settings.py
|
|
118
|
+
MANIFEST
|
|
119
|
+
marimo/_lsp/
|
|
120
|
+
marimo/_static/
|
|
121
|
+
nosetests.xml
|
|
122
|
+
parts/
|
|
123
|
+
pip-delete-this-directory.txt
|
|
124
|
+
pip-log.txt
|
|
125
|
+
profile_default/
|
|
126
|
+
profiles/*
|
|
127
|
+
sdist/
|
|
128
|
+
share/python-wheels/
|
|
129
|
+
target/
|
|
130
|
+
token.json
|
|
131
|
+
var/
|
|
132
|
+
venv.bak/
|
|
133
|
+
wheels/
|
|
134
|
+
!profiles/.example
|
|
135
|
+
!profiles/.example/**
|
dedrive-0.1.0/AGENTS.md
ADDED
|
@@ -0,0 +1,177 @@
|
|
|
1
|
+
# CLAUDE.md
|
|
2
|
+
|
|
3
|
+
This file provides guidance to Claude Code (claude.ai/code) when working with code in this repository.
|
|
4
|
+
|
|
5
|
+
## Commands
|
|
6
|
+
|
|
7
|
+
```bash
|
|
8
|
+
# Install dependencies
|
|
9
|
+
uv sync
|
|
10
|
+
|
|
11
|
+
# Install as standalone CLI tool
|
|
12
|
+
uv tool install . --editable
|
|
13
|
+
|
|
14
|
+
# Login (opens browser, auto-creates profile from email)
|
|
15
|
+
dedrive login
|
|
16
|
+
|
|
17
|
+
# Logout
|
|
18
|
+
dedrive logout
|
|
19
|
+
|
|
20
|
+
# List profiles with login status
|
|
21
|
+
dedrive --list-profiles
|
|
22
|
+
|
|
23
|
+
# Launch the Gradio web UI (auto-detects logged-in profile)
|
|
24
|
+
dedrive
|
|
25
|
+
|
|
26
|
+
# Launch on a custom port
|
|
27
|
+
dedrive --port 8080
|
|
28
|
+
|
|
29
|
+
# Enable public sharing link
|
|
30
|
+
dedrive --share
|
|
31
|
+
|
|
32
|
+
# Enable verbose/debug logging
|
|
33
|
+
dedrive --verbose
|
|
34
|
+
|
|
35
|
+
# Write logs to file
|
|
36
|
+
dedrive --log-file debug.log
|
|
37
|
+
|
|
38
|
+
# Validate credentials without launching UI
|
|
39
|
+
dedrive --profile work --validate
|
|
40
|
+
|
|
41
|
+
# Backward-compatible: run via main.py
|
|
42
|
+
uv run main.py login
|
|
43
|
+
uv run main.py --list-profiles
|
|
44
|
+
```
|
|
45
|
+
|
|
46
|
+
**Note:** PDF preview in the web UI requires poppler: `brew install poppler` (macOS)
|
|
47
|
+
|
|
48
|
+
## Configuration
|
|
49
|
+
|
|
50
|
+
All settings can be configured via environment variables, `config.json`, or profiles. Precedence: profile config.yaml > ENV > config.json > defaults.
|
|
51
|
+
|
|
52
|
+
### Environment Variables
|
|
53
|
+
|
|
54
|
+
| Variable | Default | Description |
|
|
55
|
+
|----------|---------|-------------|
|
|
56
|
+
| `GDRIVE_CREDENTIALS_PATH` | `credentials.json` | Path to OAuth credentials file |
|
|
57
|
+
| `GDRIVE_TOKEN_PATH` | (next to credentials) | Path to OAuth token file |
|
|
58
|
+
| `GDRIVE_OUTPUT_DIR` | `.output` | Directory for output files |
|
|
59
|
+
| `GDRIVE_DUPES_FOLDER` | `/_dupes` | Folder name for duplicates in Drive |
|
|
60
|
+
| `GDRIVE_BATCH_SIZE` | `100` | Batch size for API operations |
|
|
61
|
+
| `GDRIVE_MAX_PREVIEW_MB` | `10` | Max file size for previews (MB) |
|
|
62
|
+
| `GDRIVE_EXCLUDE_PATHS` | (none) | Comma-separated paths to exclude |
|
|
63
|
+
|
|
64
|
+
### Config File
|
|
65
|
+
|
|
66
|
+
Create `config.json` in the project root:
|
|
67
|
+
|
|
68
|
+
```json
|
|
69
|
+
{
|
|
70
|
+
"credentials_path": "~/.config/dedrive/credentials.json",
|
|
71
|
+
"output_dir": "~/.local/share/dedrive",
|
|
72
|
+
"dupes_folder": "/_dupes",
|
|
73
|
+
"batch_size": 100,
|
|
74
|
+
"max_preview_mb": 10,
|
|
75
|
+
"exclude_paths": [
|
|
76
|
+
"/Backup/Old",
|
|
77
|
+
"/tmp"
|
|
78
|
+
]
|
|
79
|
+
}
|
|
80
|
+
```
|
|
81
|
+
|
|
82
|
+
Paths support `~` expansion for home directory.
|
|
83
|
+
|
|
84
|
+
### Exclude Paths
|
|
85
|
+
|
|
86
|
+
Folders can be excluded from scans using two methods (combined):
|
|
87
|
+
|
|
88
|
+
1. **Config file:** Add `exclude_paths` array to `config.json`
|
|
89
|
+
2. **Environment variable:** Set `GDRIVE_EXCLUDE_PATHS` (comma-separated)
|
|
90
|
+
|
|
91
|
+
Example `.env` file:
|
|
92
|
+
```
|
|
93
|
+
GDRIVE_EXCLUDE_PATHS=/documentor-puzzle/export,/Backup/Old
|
|
94
|
+
```
|
|
95
|
+
|
|
96
|
+
### Config File Fallback
|
|
97
|
+
|
|
98
|
+
`config.json` is checked in cwd first, then falls back to `~/.dedrive/config.json`.
|
|
99
|
+
|
|
100
|
+
### Credentials Fallback
|
|
101
|
+
|
|
102
|
+
`credentials.json` is checked in cwd first, then falls back to `~/.dedrive/credentials.json`.
|
|
103
|
+
|
|
104
|
+
### Profiles
|
|
105
|
+
|
|
106
|
+
Profiles allow targeting multiple Google Drive accounts. Each profile is a subfolder under `~/.dedrive/` with its own credentials, token, config, and output data. Profiles are auto-created on `dedrive login` using the Google account email as the profile name.
|
|
107
|
+
|
|
108
|
+
```
|
|
109
|
+
~/.dedrive/
|
|
110
|
+
credentials.json # Shared OAuth client credentials (fallback)
|
|
111
|
+
config.json # Shared config (fallback)
|
|
112
|
+
user@gmail.com/
|
|
113
|
+
config.yaml # Profile settings (YAML)
|
|
114
|
+
token.json # OAuth token (auto-generated)
|
|
115
|
+
.output/ # Scan results, decisions, logs
|
|
116
|
+
```
|
|
117
|
+
|
|
118
|
+
Example `config.yaml`:
|
|
119
|
+
|
|
120
|
+
```yaml
|
|
121
|
+
dupes_folder: /_dupes
|
|
122
|
+
batch_size: 100
|
|
123
|
+
max_preview_mb: 10
|
|
124
|
+
exclude_paths:
|
|
125
|
+
- /Backup/Old
|
|
126
|
+
- /tmp
|
|
127
|
+
```
|
|
128
|
+
|
|
129
|
+
When `--profile <name>` is used, `config.py` resolves credentials, token, and output paths from the profile directory. Profile `config.yaml` values slot into the precedence chain between CLI args and environment variables.
|
|
130
|
+
|
|
131
|
+
## Architecture
|
|
132
|
+
|
|
133
|
+
Installable CLI tool (`dedrive`) with a Gradio web UI for finding and managing duplicate files in Google Drive.
|
|
134
|
+
|
|
135
|
+
**Package structure:**
|
|
136
|
+
- `dedrive/cli.py` — CLI entry point with `login`, `logout` subcommands and Gradio launcher
|
|
137
|
+
- `dedrive/ui.py` — Gradio web UI (all UI code, dataclasses, helpers)
|
|
138
|
+
- `dedrive/drive.py` — Google Drive authentication, API fetch, path resolution
|
|
139
|
+
- `dedrive/dedup.py` — Duplicate detection logic
|
|
140
|
+
- `dedrive/config.py` — Configuration management with fallback chain
|
|
141
|
+
- `dedrive/profiles.py` — Profile management (profiles stored in `~/.dedrive/`)
|
|
142
|
+
- `main.py` — Thin wrapper for backward compatibility (`uv run main.py`)
|
|
143
|
+
|
|
144
|
+
**Features:**
|
|
145
|
+
- **CLI:** `login` (browser-based OAuth), `logout` (remove token), `--list-profiles`
|
|
146
|
+
- **Scan Tab:** Run scans with progress feedback
|
|
147
|
+
- **Review Tab:** Side-by-side file comparison with previews, make keep/skip decisions
|
|
148
|
+
- **Export Tab:** Preview and execute moves, export decisions.json
|
|
149
|
+
- **CLI flags:** Profile selection (`--profile`), credential validation (`--validate`), logging (`--verbose`, `--log-file`), Gradio options (`--port`, `--share`)
|
|
150
|
+
|
|
151
|
+
**Key design decisions:**
|
|
152
|
+
- Profiles auto-created on login using Google account email as name
|
|
153
|
+
- Profiles stored in `~/.dedrive/` (works when installed as standalone CLI)
|
|
154
|
+
- `login` subcommand opens browser for OAuth without importing Gradio
|
|
155
|
+
- Uses `drive` scope (full access for file moves)
|
|
156
|
+
- Fetches all files in one query then filters locally (faster than recursive folder traversal)
|
|
157
|
+
- Path resolution uses memoization (`path_cache`) for efficiency
|
|
158
|
+
- Files with same MD5 but different size marked as "uncertain"
|
|
159
|
+
- Google Workspace files (Docs, Sheets) skipped (no MD5 available)
|
|
160
|
+
- Decisions auto-save to `.output/decisions.json` (resume sessions)
|
|
161
|
+
- File previews cached in `.output/preview_cache/`
|
|
162
|
+
|
|
163
|
+
**Output:** `.output/scan_results.json` (scan results), `.output/decisions.json` (user decisions), `.output/execution_log.json` (move results)
|
|
164
|
+
|
|
165
|
+
### Moving Duplicates
|
|
166
|
+
|
|
167
|
+
Instead of deleting duplicates, files are moved to a `/_dupes` folder at the root of your Google Drive:
|
|
168
|
+
|
|
169
|
+
1. **Scan** your drive to find duplicates
|
|
170
|
+
2. **Review** and mark which files to keep
|
|
171
|
+
3. **Preview (Dry Run)** to see what would be moved
|
|
172
|
+
4. **Execute** to move duplicates to `/_dupes`
|
|
173
|
+
|
|
174
|
+
The original folder structure is preserved under `/_dupes`:
|
|
175
|
+
- `/Photos/2024/IMG.jpg` → `/_dupes/Photos/2024/IMG.jpg`
|
|
176
|
+
|
|
177
|
+
**Re-authentication required:** If you previously used this tool with read-only access, delete `token.json` and re-authenticate to grant move permissions.
|
dedrive-0.1.0/CLAUDE.md
ADDED
|
@@ -0,0 +1 @@
|
|
|
1
|
+
AGENTS.md
|
dedrive-0.1.0/LICENSE
ADDED
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2026 Tiago Silva
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|
dedrive-0.1.0/Makefile
ADDED
dedrive-0.1.0/PKG-INFO
ADDED
|
@@ -0,0 +1,224 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: dedrive
|
|
3
|
+
Version: 0.1.0
|
|
4
|
+
Summary: Find duplicate files in Google Drive using MD5 checksums
|
|
5
|
+
Project-URL: Homepage, https://github.com/tsilva/dedrive
|
|
6
|
+
Project-URL: Repository, https://github.com/tsilva/dedrive
|
|
7
|
+
Project-URL: Issues, https://github.com/tsilva/dedrive/issues
|
|
8
|
+
Author-email: Tiago Silva <tiago@tsilva.com>
|
|
9
|
+
License: MIT
|
|
10
|
+
License-File: LICENSE
|
|
11
|
+
Keywords: cleanup,deduplication,duplicates,google-drive,md5,storage
|
|
12
|
+
Classifier: Development Status :: 4 - Beta
|
|
13
|
+
Classifier: Environment :: Console
|
|
14
|
+
Classifier: Environment :: Web Environment
|
|
15
|
+
Classifier: Intended Audience :: Developers
|
|
16
|
+
Classifier: Intended Audience :: End Users/Desktop
|
|
17
|
+
Classifier: License :: OSI Approved :: MIT License
|
|
18
|
+
Classifier: Programming Language :: Python :: 3
|
|
19
|
+
Classifier: Programming Language :: Python :: 3.10
|
|
20
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
21
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
22
|
+
Classifier: Programming Language :: Python :: 3.13
|
|
23
|
+
Classifier: Topic :: System :: Filesystems
|
|
24
|
+
Classifier: Topic :: Utilities
|
|
25
|
+
Requires-Python: >=3.10
|
|
26
|
+
Requires-Dist: google-api-python-client
|
|
27
|
+
Requires-Dist: google-auth-httplib2
|
|
28
|
+
Requires-Dist: google-auth-oauthlib
|
|
29
|
+
Requires-Dist: gradio>=4.0.0
|
|
30
|
+
Requires-Dist: pdf2image>=1.16.0
|
|
31
|
+
Requires-Dist: python-dotenv>=1.0.0
|
|
32
|
+
Requires-Dist: pyyaml>=6.0
|
|
33
|
+
Provides-Extra: dev
|
|
34
|
+
Requires-Dist: mypy>=1.10.0; extra == 'dev'
|
|
35
|
+
Requires-Dist: pytest-cov>=5.0.0; extra == 'dev'
|
|
36
|
+
Requires-Dist: pytest>=8.0.0; extra == 'dev'
|
|
37
|
+
Requires-Dist: ruff>=0.5.0; extra == 'dev'
|
|
38
|
+
Description-Content-Type: text/markdown
|
|
39
|
+
|
|
40
|
+
<div align="center">
|
|
41
|
+
<img src="logo.png" alt="dedrive" width="512"/>
|
|
42
|
+
|
|
43
|
+
[](https://python.org)
|
|
44
|
+
[](LICENSE)
|
|
45
|
+
[](https://developers.google.com/drive)
|
|
46
|
+
|
|
47
|
+
**🔍 Find and manage duplicate files in Google Drive using MD5 checksums 🗑️**
|
|
48
|
+
|
|
49
|
+
[Features](#features) · [Quick Start](#quick-start) · [Configuration](#configuration) · [Usage](#usage)
|
|
50
|
+
</div>
|
|
51
|
+
|
|
52
|
+
---
|
|
53
|
+
|
|
54
|
+
## Features
|
|
55
|
+
|
|
56
|
+
- **Fast MD5-based detection** - Identifies duplicates by comparing file checksums, not just names
|
|
57
|
+
- **Two interfaces** - CLI for quick scans, Web UI for interactive review with file previews
|
|
58
|
+
- **Non-destructive** - Moves duplicates to `/_dupes` folder instead of deleting them
|
|
59
|
+
- **Preserves structure** - Original folder hierarchy is maintained under the dupes folder
|
|
60
|
+
- **Resumable sessions** - Decisions auto-save and persist across sessions
|
|
61
|
+
- **Flexible filtering** - Scan specific paths and exclude folders from analysis
|
|
62
|
+
- **Multi-account profiles** - Target different Google Drive accounts with named profiles
|
|
63
|
+
|
|
64
|
+
## Quick Start
|
|
65
|
+
|
|
66
|
+
```bash
|
|
67
|
+
# Install with uv
|
|
68
|
+
uv sync
|
|
69
|
+
|
|
70
|
+
# Launch the web UI
|
|
71
|
+
uv run main.py
|
|
72
|
+
```
|
|
73
|
+
|
|
74
|
+
**First run:** A browser window will open for Google OAuth authentication. Grant access to your Google Drive.
|
|
75
|
+
|
|
76
|
+
## Installation
|
|
77
|
+
|
|
78
|
+
### Prerequisites
|
|
79
|
+
|
|
80
|
+
- Python 3.10+
|
|
81
|
+
- [uv](https://docs.astral.sh/uv/) package manager
|
|
82
|
+
- Google Cloud OAuth credentials ([setup guide](#google-cloud-setup))
|
|
83
|
+
|
|
84
|
+
### Google Cloud Setup
|
|
85
|
+
|
|
86
|
+
1. Go to [Google Cloud Console](https://console.cloud.google.com/apis/credentials)
|
|
87
|
+
2. Create a project (or select existing)
|
|
88
|
+
3. Enable the **Google Drive API**
|
|
89
|
+
4. Create **OAuth 2.0 Client ID** (choose "Desktop app")
|
|
90
|
+
5. Download the JSON file and save as `credentials.json` in the project root
|
|
91
|
+
|
|
92
|
+
## Usage
|
|
93
|
+
|
|
94
|
+
### CLI Tool
|
|
95
|
+
|
|
96
|
+
```bash
|
|
97
|
+
# Scan entire drive
|
|
98
|
+
uv run main.py
|
|
99
|
+
|
|
100
|
+
# Scan specific folder
|
|
101
|
+
uv run main.py --path "/Photos"
|
|
102
|
+
|
|
103
|
+
# Exclude folders
|
|
104
|
+
uv run main.py --exclude "/Backup/Old" --exclude "/tmp"
|
|
105
|
+
|
|
106
|
+
# Custom output location
|
|
107
|
+
uv run main.py --output results.csv
|
|
108
|
+
|
|
109
|
+
# Validate credentials
|
|
110
|
+
uv run main.py --validate
|
|
111
|
+
|
|
112
|
+
# Debug logging
|
|
113
|
+
uv run main.py --verbose --log-file debug.log
|
|
114
|
+
|
|
115
|
+
# Profiles (multiple Google accounts)
|
|
116
|
+
uv run main.py --init-profile work
|
|
117
|
+
uv run main.py --list-profiles
|
|
118
|
+
uv run main.py --profile work
|
|
119
|
+
```
|
|
120
|
+
|
|
121
|
+
### Web UI
|
|
122
|
+
|
|
123
|
+
```bash
|
|
124
|
+
uv run main.py
|
|
125
|
+
```
|
|
126
|
+
|
|
127
|
+
The web interface provides three tabs:
|
|
128
|
+
|
|
129
|
+
| Tab | Purpose |
|
|
130
|
+
|-----|---------|
|
|
131
|
+
| **Scan** | Run scans with path filtering and progress feedback |
|
|
132
|
+
| **Review** | Side-by-side comparison with file previews, make keep/skip decisions |
|
|
133
|
+
| **Export** | Preview moves (dry run), execute moves, export decisions to JSON |
|
|
134
|
+
|
|
135
|
+
**Note:** PDF preview requires poppler: `brew install poppler` (macOS)
|
|
136
|
+
|
|
137
|
+
### Moving Duplicates
|
|
138
|
+
|
|
139
|
+
Instead of deleting, duplicates are moved to `/_dupes` at Drive root:
|
|
140
|
+
|
|
141
|
+
```
|
|
142
|
+
/Photos/2024/IMG.jpg → /_dupes/Photos/2024/IMG.jpg
|
|
143
|
+
```
|
|
144
|
+
|
|
145
|
+
1. **Scan** - Find duplicates
|
|
146
|
+
2. **Review** - Mark which files to keep
|
|
147
|
+
3. **Preview** - Dry run to see what would move
|
|
148
|
+
4. **Execute** - Move duplicates to `/_dupes`
|
|
149
|
+
|
|
150
|
+
## Configuration
|
|
151
|
+
|
|
152
|
+
Settings can be configured via environment variables, `config.json`, or CLI arguments.
|
|
153
|
+
|
|
154
|
+
**Precedence:** CLI > Profile config.yaml > Environment > Config file > Defaults
|
|
155
|
+
|
|
156
|
+
### Environment Variables
|
|
157
|
+
|
|
158
|
+
| Variable | Default | Description |
|
|
159
|
+
|----------|---------|-------------|
|
|
160
|
+
| `GDRIVE_CREDENTIALS_PATH` | `credentials.json` | OAuth credentials file |
|
|
161
|
+
| `GDRIVE_TOKEN_PATH` | (next to credentials) | OAuth token file |
|
|
162
|
+
| `GDRIVE_OUTPUT_DIR` | `.output` | Output directory |
|
|
163
|
+
| `GDRIVE_DUPES_FOLDER` | `/_dupes` | Folder for duplicates |
|
|
164
|
+
| `GDRIVE_BATCH_SIZE` | `100` | Batch size for API operations |
|
|
165
|
+
| `GDRIVE_MAX_PREVIEW_MB` | `10` | Max file size for previews |
|
|
166
|
+
| `GDRIVE_EXCLUDE_PATHS` | (none) | Comma-separated paths to exclude |
|
|
167
|
+
|
|
168
|
+
### Config File
|
|
169
|
+
|
|
170
|
+
Create `config.json` in the project root:
|
|
171
|
+
|
|
172
|
+
```json
|
|
173
|
+
{
|
|
174
|
+
"credentials_path": "~/.config/dedrive/credentials.json",
|
|
175
|
+
"output_dir": "~/.local/share/dedrive",
|
|
176
|
+
"dupes_folder": "/_dupes",
|
|
177
|
+
"batch_size": 100,
|
|
178
|
+
"exclude_paths": ["/Backup/Old", "/tmp"]
|
|
179
|
+
}
|
|
180
|
+
```
|
|
181
|
+
|
|
182
|
+
### Profiles
|
|
183
|
+
|
|
184
|
+
Use profiles to manage multiple Google Drive accounts:
|
|
185
|
+
|
|
186
|
+
```bash
|
|
187
|
+
# Create a profile
|
|
188
|
+
uv run main.py --init-profile work
|
|
189
|
+
|
|
190
|
+
# Copy credentials into the profile
|
|
191
|
+
cp ~/Downloads/credentials.json profiles/work/
|
|
192
|
+
|
|
193
|
+
# Use the profile
|
|
194
|
+
uv run main.py --profile work
|
|
195
|
+
```
|
|
196
|
+
|
|
197
|
+
Each profile stores its own `credentials.json`, `token.json`, `config.yaml`, and `.output/` under `profiles/<name>/`.
|
|
198
|
+
|
|
199
|
+
## Output Files
|
|
200
|
+
|
|
201
|
+
| File | Description |
|
|
202
|
+
|------|-------------|
|
|
203
|
+
| `.output/duplicates.csv` | Scan results with duplicate pairs |
|
|
204
|
+
| `.output/decisions.json` | User decisions (auto-saved) |
|
|
205
|
+
| `.output/execution_log.json` | Move operation results |
|
|
206
|
+
| `.output/scan_results.json` | Cached scan data for session resume |
|
|
207
|
+
|
|
208
|
+
## How It Works
|
|
209
|
+
|
|
210
|
+
1. **OAuth authentication** - Cached in `token.json` after first login
|
|
211
|
+
2. **Single API call** - Fetches all files with MD5 metadata in one paginated request
|
|
212
|
+
3. **In-memory path resolution** - Builds paths from parent IDs with memoization
|
|
213
|
+
4. **MD5 grouping** - Groups files by checksum to identify duplicates
|
|
214
|
+
5. **Size validation** - Files with same MD5 but different sizes flagged as "uncertain"
|
|
215
|
+
|
|
216
|
+
**Note:** Google Workspace files (Docs, Sheets, Slides) are skipped as they don't have MD5 checksums.
|
|
217
|
+
|
|
218
|
+
## Re-authentication
|
|
219
|
+
|
|
220
|
+
If you previously used this tool with read-only access, delete `token.json` and re-authenticate to grant move permissions.
|
|
221
|
+
|
|
222
|
+
## License
|
|
223
|
+
|
|
224
|
+
MIT
|