pystou 0.1.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- pystou-0.1.0/.claude/settings.local.json +24 -0
- pystou-0.1.0/.github/workflows/ci.yml +47 -0
- pystou-0.1.0/.github/workflows/release.yml +28 -0
- pystou-0.1.0/.gitignore +121 -0
- pystou-0.1.0/LICENSE +7 -0
- pystou-0.1.0/Makefile +76 -0
- pystou-0.1.0/PKG-INFO +392 -0
- pystou-0.1.0/README.md +372 -0
- pystou-0.1.0/cleanup/__init__.py +0 -0
- pystou-0.1.0/cleanup/main.py +310 -0
- pystou-0.1.0/common/__init__.py +0 -0
- pystou-0.1.0/common/cli.py +37 -0
- pystou-0.1.0/common/cursor.py +98 -0
- pystou-0.1.0/common/errors.py +9 -0
- pystou-0.1.0/common/fs_walker.py +178 -0
- pystou-0.1.0/common/indexer.py +178 -0
- pystou-0.1.0/common/interrupt.py +26 -0
- pystou-0.1.0/common/logger.py +67 -0
- pystou-0.1.0/common/safe_extract.py +100 -0
- pystou-0.1.0/common/safe_ops.py +47 -0
- pystou-0.1.0/common/utils.py +559 -0
- pystou-0.1.0/common/validation.py +46 -0
- pystou-0.1.0/dedup_folders/__init__.py +0 -0
- pystou-0.1.0/dedup_folders/main.py +346 -0
- pystou-0.1.0/empty/__init__.py +1 -0
- pystou-0.1.0/empty/main.py +308 -0
- pystou-0.1.0/extract/__init__.py +0 -0
- pystou-0.1.0/extract/main.py +394 -0
- pystou-0.1.0/identify/__init__.py +1 -0
- pystou-0.1.0/identify/main.py +401 -0
- pystou-0.1.0/pyproject.toml +77 -0
- pystou-0.1.0/pystou/__init__.py +3 -0
- pystou-0.1.0/pystou/main.py +129 -0
- pystou-0.1.0/run/.gitignore +2 -0
- pystou-0.1.0/stats/__init__.py +1 -0
- pystou-0.1.0/stats/main.py +327 -0
- pystou-0.1.0/tests/__init__.py +0 -0
- pystou-0.1.0/tests/test_cleanup.py +300 -0
- pystou-0.1.0/tests/test_cursor.py +181 -0
- pystou-0.1.0/tests/test_dedup_folders.py +128 -0
- pystou-0.1.0/tests/test_empty.py +375 -0
- pystou-0.1.0/tests/test_errors.py +20 -0
- pystou-0.1.0/tests/test_extract.py +317 -0
- pystou-0.1.0/tests/test_fs_walker.py +99 -0
- pystou-0.1.0/tests/test_identify.py +356 -0
- pystou-0.1.0/tests/test_indexer.py +108 -0
- pystou-0.1.0/tests/test_interrupt.py +28 -0
- pystou-0.1.0/tests/test_logger.py +40 -0
- pystou-0.1.0/tests/test_main_boundary.py +48 -0
- pystou-0.1.0/tests/test_safe_extract.py +131 -0
- pystou-0.1.0/tests/test_safe_ops.py +41 -0
- pystou-0.1.0/tests/test_stats.py +324 -0
- pystou-0.1.0/tests/test_utils_extract.py +107 -0
- pystou-0.1.0/tests/test_validation.py +48 -0
- pystou-0.1.0/uv.lock +766 -0
|
@@ -0,0 +1,24 @@
|
|
|
1
|
+
{
|
|
2
|
+
"permissions": {
|
|
3
|
+
"allow": [
|
|
4
|
+
"Bash(make test:*)",
|
|
5
|
+
"Bash(python3:*)",
|
|
6
|
+
"Bash(ruff check:*)",
|
|
7
|
+
"Bash(git add:*)",
|
|
8
|
+
"Bash(git commit:*)",
|
|
9
|
+
"Bash(git stash:*)",
|
|
10
|
+
"Bash(pip install:*)",
|
|
11
|
+
"Bash(pystou --help:*)",
|
|
12
|
+
"Bash(pystou dedup --help:*)",
|
|
13
|
+
"Bash(pystou unarchive --help:*)",
|
|
14
|
+
"Bash(pip uninstall:*)",
|
|
15
|
+
"Bash(pystou unarchive:*)",
|
|
16
|
+
"Bash(PYTHONPATH=/home/dev/Repositories/pystou python3:*)",
|
|
17
|
+
"Bash(git reset:*)",
|
|
18
|
+
"Bash(git mv:*)",
|
|
19
|
+
"Bash(git rm:*)",
|
|
20
|
+
"Bash(pystou cleanup --help:*)",
|
|
21
|
+
"Bash(python -m pytest:*)"
|
|
22
|
+
]
|
|
23
|
+
}
|
|
24
|
+
}
|
|
@@ -0,0 +1,47 @@
|
|
|
1
|
+
name: CI
|
|
2
|
+
|
|
3
|
+
on:
|
|
4
|
+
push:
|
|
5
|
+
branches: ['*']
|
|
6
|
+
pull_request:
|
|
7
|
+
branches: ['*']
|
|
8
|
+
|
|
9
|
+
jobs:
|
|
10
|
+
lint:
|
|
11
|
+
runs-on: ubuntu-latest
|
|
12
|
+
steps:
|
|
13
|
+
- uses: actions/checkout@v4
|
|
14
|
+
|
|
15
|
+
- name: Set up Python
|
|
16
|
+
uses: actions/setup-python@v5
|
|
17
|
+
with:
|
|
18
|
+
python-version: '3.12'
|
|
19
|
+
|
|
20
|
+
- name: Install ruff
|
|
21
|
+
run: pip install ruff
|
|
22
|
+
|
|
23
|
+
- name: Lint
|
|
24
|
+
run: ruff check cleanup common dedup_folders empty extract identify pystou stats tests
|
|
25
|
+
|
|
26
|
+
- name: Format check
|
|
27
|
+
run: ruff format --check cleanup common dedup_folders empty extract identify pystou stats tests
|
|
28
|
+
|
|
29
|
+
test:
|
|
30
|
+
runs-on: ubuntu-latest
|
|
31
|
+
strategy:
|
|
32
|
+
matrix:
|
|
33
|
+
python-version: ['3.9', '3.10', '3.11', '3.12']
|
|
34
|
+
|
|
35
|
+
steps:
|
|
36
|
+
- uses: actions/checkout@v4
|
|
37
|
+
|
|
38
|
+
- name: Set up Python ${{ matrix.python-version }}
|
|
39
|
+
uses: actions/setup-python@v5
|
|
40
|
+
with:
|
|
41
|
+
python-version: ${{ matrix.python-version }}
|
|
42
|
+
|
|
43
|
+
- name: Install package
|
|
44
|
+
run: pip install -e ".[dev]"
|
|
45
|
+
|
|
46
|
+
- name: Run tests
|
|
47
|
+
run: python -m pytest tests -v
|
|
@@ -0,0 +1,28 @@
|
|
|
1
|
+
name: Release
|
|
2
|
+
|
|
3
|
+
on:
|
|
4
|
+
push:
|
|
5
|
+
branches: [main]
|
|
6
|
+
|
|
7
|
+
concurrency:
|
|
8
|
+
group: release
|
|
9
|
+
cancel-in-progress: false
|
|
10
|
+
|
|
11
|
+
jobs:
|
|
12
|
+
release:
|
|
13
|
+
# Only run on the canonical repo, never on forks.
|
|
14
|
+
if: github.repository == 'ICIJ/pystou'
|
|
15
|
+
runs-on: ubuntu-latest
|
|
16
|
+
permissions:
|
|
17
|
+
contents: write
|
|
18
|
+
steps:
|
|
19
|
+
- uses: actions/checkout@v4
|
|
20
|
+
with:
|
|
21
|
+
fetch-depth: 0
|
|
22
|
+
token: ${{ secrets.GITHUB_TOKEN }}
|
|
23
|
+
|
|
24
|
+
- name: Python Semantic Release
|
|
25
|
+
id: release
|
|
26
|
+
uses: python-semantic-release/python-semantic-release@v9
|
|
27
|
+
with:
|
|
28
|
+
github_token: ${{ secrets.GITHUB_TOKEN }}
|
pystou-0.1.0/.gitignore
ADDED
|
@@ -0,0 +1,121 @@
|
|
|
1
|
+
# Byte-compiled / optimized / DLL files
|
|
2
|
+
__pycache__/
|
|
3
|
+
*.py[cod]
|
|
4
|
+
*$py.class
|
|
5
|
+
|
|
6
|
+
# C extensions
|
|
7
|
+
*.so
|
|
8
|
+
|
|
9
|
+
# Distribution / packaging
|
|
10
|
+
.Python
|
|
11
|
+
build/
|
|
12
|
+
develop-eggs/
|
|
13
|
+
dist/
|
|
14
|
+
downloads/
|
|
15
|
+
eggs/
|
|
16
|
+
.eggs/
|
|
17
|
+
lib/
|
|
18
|
+
lib64/
|
|
19
|
+
parts/
|
|
20
|
+
sdist/
|
|
21
|
+
var/
|
|
22
|
+
wheels/
|
|
23
|
+
*.egg-info/
|
|
24
|
+
.installed.cfg
|
|
25
|
+
*.egg
|
|
26
|
+
|
|
27
|
+
# PyInstaller
|
|
28
|
+
# Usually these files are written by a python script from a template
|
|
29
|
+
# before PyInstaller builds the exe, so as to inject date/other infos into it.
|
|
30
|
+
*.manifest
|
|
31
|
+
*.spec
|
|
32
|
+
|
|
33
|
+
# Installer logs
|
|
34
|
+
pip-log.txt
|
|
35
|
+
pip-delete-this-directory.txt
|
|
36
|
+
|
|
37
|
+
# Unit test / coverage reports
|
|
38
|
+
htmlcov/
|
|
39
|
+
.tox/
|
|
40
|
+
.nox/
|
|
41
|
+
.coverage
|
|
42
|
+
.coverage.*
|
|
43
|
+
.cache
|
|
44
|
+
nosetests.xml
|
|
45
|
+
coverage.xml
|
|
46
|
+
*.cover
|
|
47
|
+
*.py,cover
|
|
48
|
+
.hypothesis/
|
|
49
|
+
.ruff_cache
|
|
50
|
+
|
|
51
|
+
# Translations
|
|
52
|
+
*.mo
|
|
53
|
+
*.pot
|
|
54
|
+
|
|
55
|
+
# Django stuff:
|
|
56
|
+
*.log
|
|
57
|
+
local_settings.py
|
|
58
|
+
db.sqlite3
|
|
59
|
+
|
|
60
|
+
# Flask stuff:
|
|
61
|
+
instance/
|
|
62
|
+
.webassets-cache
|
|
63
|
+
|
|
64
|
+
# Scrapy stuff:
|
|
65
|
+
.scrapy
|
|
66
|
+
|
|
67
|
+
# Sphinx documentation
|
|
68
|
+
docs/_build/
|
|
69
|
+
|
|
70
|
+
# PyBuilder
|
|
71
|
+
target/
|
|
72
|
+
|
|
73
|
+
# Jupyter Notebook
|
|
74
|
+
.ipynb_checkpoints
|
|
75
|
+
|
|
76
|
+
# IPython
|
|
77
|
+
profile_default/
|
|
78
|
+
ipython_config.py
|
|
79
|
+
|
|
80
|
+
# pyenv
|
|
81
|
+
.python-version
|
|
82
|
+
|
|
83
|
+
# celery beat schedule file
|
|
84
|
+
celerybeat-schedule
|
|
85
|
+
|
|
86
|
+
# SageMath parsed files
|
|
87
|
+
*.sage.py
|
|
88
|
+
|
|
89
|
+
# Environments
|
|
90
|
+
.env
|
|
91
|
+
.venv
|
|
92
|
+
env/
|
|
93
|
+
venv/
|
|
94
|
+
ENV/
|
|
95
|
+
env.bak/
|
|
96
|
+
venv.bak/
|
|
97
|
+
|
|
98
|
+
# Spyder project settings
|
|
99
|
+
.spyderproject
|
|
100
|
+
.spyderworkspace
|
|
101
|
+
|
|
102
|
+
# Rope project settings
|
|
103
|
+
.ropeproject
|
|
104
|
+
|
|
105
|
+
# mkdocs documentation
|
|
106
|
+
/site
|
|
107
|
+
|
|
108
|
+
# mypy
|
|
109
|
+
.mypy_cache/
|
|
110
|
+
.dmypy.json
|
|
111
|
+
dmypy.json
|
|
112
|
+
|
|
113
|
+
# Pyre type checker
|
|
114
|
+
.pyre/
|
|
115
|
+
|
|
116
|
+
# SQLite3 database files
|
|
117
|
+
*.sqlite3
|
|
118
|
+
*.db
|
|
119
|
+
|
|
120
|
+
# Superpowers (local design specs/plans, not tracked)
|
|
121
|
+
docs/superpowers/
|
pystou-0.1.0/LICENSE
ADDED
|
@@ -0,0 +1,7 @@
|
|
|
1
|
+
Copyright 2024 International Consortium of Investigative Journalists
|
|
2
|
+
|
|
3
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the “Software”), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions:
|
|
4
|
+
|
|
5
|
+
The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
|
|
6
|
+
|
|
7
|
+
THE SOFTWARE IS PROVIDED “AS IS”, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
pystou-0.1.0/Makefile
ADDED
|
@@ -0,0 +1,76 @@
|
|
|
1
|
+
.PHONY: help test coverage lint format typecheck clean install build publish release-dry _check-uv
|
|
2
|
+
|
|
3
|
+
SRC := cleanup common dedup_folders empty extract identify pystou stats
|
|
4
|
+
|
|
5
|
+
help:
|
|
6
|
+
@echo "pystou - Python scripts for deduplicating folders and unarchiving files"
|
|
7
|
+
@echo ""
|
|
8
|
+
@echo "Usage:"
|
|
9
|
+
@echo " make install Sync the uv environment with dev extras"
|
|
10
|
+
@echo " pystou dedup DIR Find and deduplicate duplicate folders"
|
|
11
|
+
@echo " pystou extract DIR Extract archive files"
|
|
12
|
+
@echo " pystou cleanup DIR Remove junk files (.DS_Store, Thumbs.db, ...)"
|
|
13
|
+
@echo " pystou identify DIR Identify file types and detect issues"
|
|
14
|
+
@echo " pystou stats DIR Show directory statistics"
|
|
15
|
+
@echo " pystou empty DIR Find and remove empty directories"
|
|
16
|
+
@echo ""
|
|
17
|
+
@echo "Development:"
|
|
18
|
+
@echo " make test Run the test suite"
|
|
19
|
+
@echo " make coverage Run tests with coverage report"
|
|
20
|
+
@echo " make lint Check code style"
|
|
21
|
+
@echo " make format Auto-format code"
|
|
22
|
+
@echo " make typecheck Run mypy"
|
|
23
|
+
@echo " make clean Remove cache and build files"
|
|
24
|
+
@echo ""
|
|
25
|
+
@echo "Release:"
|
|
26
|
+
@echo " make build Build sdist and wheel into dist/"
|
|
27
|
+
@echo " make publish Build and upload to PyPI (needs a token)"
|
|
28
|
+
@echo " make release-dry Preview the next semantic-release version"
|
|
29
|
+
|
|
30
|
+
_check-uv:
|
|
31
|
+
@command -v uv >/dev/null 2>&1 || { \
|
|
32
|
+
echo "Error: uv is not installed"; \
|
|
33
|
+
echo ""; \
|
|
34
|
+
echo "Install it from https://docs.astral.sh/uv/getting-started/installation/"; \
|
|
35
|
+
exit 1; \
|
|
36
|
+
}
|
|
37
|
+
|
|
38
|
+
test: _check-uv
|
|
39
|
+
@uv run --extra dev pytest tests -v
|
|
40
|
+
|
|
41
|
+
coverage: _check-uv
|
|
42
|
+
@uv run --extra dev pytest --cov=. --cov-report=term-missing tests
|
|
43
|
+
|
|
44
|
+
lint: _check-uv
|
|
45
|
+
@uv run --extra dev ruff check $(SRC) tests && uv run --extra dev ruff format --check $(SRC) tests && echo "Lint OK"
|
|
46
|
+
|
|
47
|
+
format: _check-uv
|
|
48
|
+
@uv run --extra dev ruff check --fix $(SRC) tests && uv run --extra dev ruff format $(SRC) tests && echo "Format OK"
|
|
49
|
+
|
|
50
|
+
typecheck: _check-uv
|
|
51
|
+
@uv run --extra dev mypy $(SRC) && echo "Typecheck OK"
|
|
52
|
+
|
|
53
|
+
clean:
|
|
54
|
+
@rm -rf .pytest_cache .mypy_cache .ruff_cache .coverage htmlcov
|
|
55
|
+
@rm -rf dist build pystou.egg-info
|
|
56
|
+
@find . -type d -name "__pycache__" -prune -exec rm -rf {} +
|
|
57
|
+
@find . -type f -name "*.pyc" -delete
|
|
58
|
+
@echo "Cleaned"
|
|
59
|
+
|
|
60
|
+
install: _check-uv
|
|
61
|
+
@uv sync --extra dev
|
|
62
|
+
@echo "Synced pystou environment with dev extras"
|
|
63
|
+
|
|
64
|
+
build: _check-uv
|
|
65
|
+
@rm -rf dist build pystou.egg-info
|
|
66
|
+
@uv build
|
|
67
|
+
@echo ""
|
|
68
|
+
@echo "Built artifacts:"
|
|
69
|
+
@ls -1 dist
|
|
70
|
+
|
|
71
|
+
publish: build
|
|
72
|
+
@uv publish
|
|
73
|
+
@echo "Published to PyPI"
|
|
74
|
+
|
|
75
|
+
release-dry: _check-uv
|
|
76
|
+
@uvx --from python-semantic-release semantic-release version --print
|
pystou-0.1.0/PKG-INFO
ADDED
|
@@ -0,0 +1,392 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: pystou
|
|
3
|
+
Version: 0.1.0
|
|
4
|
+
Summary: Python scripts for deduplicating folders and unarchiving files.
|
|
5
|
+
Project-URL: Homepage, https://github.com/ICIJ/pystou
|
|
6
|
+
Project-URL: Repository, https://github.com/ICIJ/pystou
|
|
7
|
+
Author: ICIJ
|
|
8
|
+
License-Expression: MIT
|
|
9
|
+
License-File: LICENSE
|
|
10
|
+
Keywords: archive,cli,deduplication,filesystem
|
|
11
|
+
Classifier: Operating System :: OS Independent
|
|
12
|
+
Classifier: Programming Language :: Python :: 3
|
|
13
|
+
Requires-Python: >=3.9
|
|
14
|
+
Provides-Extra: dev
|
|
15
|
+
Requires-Dist: mypy>=1.10; extra == 'dev'
|
|
16
|
+
Requires-Dist: pytest-cov>=5.0; extra == 'dev'
|
|
17
|
+
Requires-Dist: pytest>=8.0; extra == 'dev'
|
|
18
|
+
Requires-Dist: ruff>=0.5; extra == 'dev'
|
|
19
|
+
Description-Content-Type: text/markdown
|
|
20
|
+
|
|
21
|
+
# PyStou
|
|
22
|
+
|
|
23
|
+
Welcome to **PyStou** – your ultimate toolkit for keeping your filesystem tidy and organized! Whether you're a developer drowning in duplicate folders or someone who loves archiving files but hates the clutter, PyStou is here to rescue you from chaos with style and efficiency.
|
|
24
|
+
|
|
25
|
+
**PyStou** is proudly developed by the [International Consortium of Investigative Journalists (ICIJ)](https://www.icij.org/), aiming to empower users with tools to manage and maintain large amounts of files.
|
|
26
|
+
|
|
27
|
+
## Table of Contents
|
|
28
|
+
|
|
29
|
+
- [Features](#features)
|
|
30
|
+
- [Installation](#installation)
|
|
31
|
+
- [Prerequisites](#prerequisites)
|
|
32
|
+
- [Clone the Repository](#clone-the-repository)
|
|
33
|
+
- [Install the Package](#install-the-package)
|
|
34
|
+
- [Usage](#usage)
|
|
35
|
+
- [Deduplicate Folders](#deduplicate-folders)
|
|
36
|
+
- [Extract Archives](#extract-archives)
|
|
37
|
+
- [Cleanup Junk Files](#cleanup-junk-files)
|
|
38
|
+
- [Identify File Types](#identify-file-types)
|
|
39
|
+
- [Directory Statistics](#directory-statistics)
|
|
40
|
+
- [Empty Directories](#empty-directories)
|
|
41
|
+
- [Running Tests](#running-tests)
|
|
42
|
+
- [License](#license)
|
|
43
|
+
|
|
44
|
+
## Features
|
|
45
|
+
|
|
46
|
+
- Automatically identify and manage duplicate directories, ensuring you only keep what you need.
|
|
47
|
+
- Effortlessly extract a wide range of archive formats, including `.zip`, `.tar.gz`, `.zst`, and `.pst`.
|
|
48
|
+
- Support for split ZIP archives (`.z01`, `.z02`, etc.) with automatic detection.
|
|
49
|
+
- Nested archive extraction for archives containing other archives.
|
|
50
|
+
- Parallel archive extraction for faster processing of multiple archives.
|
|
51
|
+
- Remove junk files (`.DS_Store`, `Thumbs.db`, `__MACOSX`, etc.) with a single command.
|
|
52
|
+
- Detect file type mismatches and encrypted archives.
|
|
53
|
+
- Get comprehensive directory statistics including file counts, sizes, and types.
|
|
54
|
+
- Find and remove empty directories safely.
|
|
55
|
+
- Choose to interact with each file/archive or set default actions for seamless automation.
|
|
56
|
+
- Keep track of all actions with detailed JSON-formatted logs for easy troubleshooting.
|
|
57
|
+
- Pure native Python scripts ready to run out-of-the-box (except for necessary command-line tools).
|
|
58
|
+
|
|
59
|
+
## Installation
|
|
60
|
+
|
|
61
|
+
Getting started with PyStou is a breeze! Follow the steps below to install and set up the project on your machine.
|
|
62
|
+
|
|
63
|
+
### Prerequisites
|
|
64
|
+
|
|
65
|
+
- **Python 3.7 or higher** is required.
|
|
66
|
+
- **Command-Line Tools:**
|
|
67
|
+
- **`p7zip-full`**: Required for extracting split ZIP archives (`.z01`, `.z02`, etc.).
|
|
68
|
+
- **`pst-utils`**: Required for extracting `.pst` files.
|
|
69
|
+
- **`zstd`**: Required for handling `.zst` files.
|
|
70
|
+
|
|
71
|
+
### Clone the Repository
|
|
72
|
+
|
|
73
|
+
```bash
|
|
74
|
+
git clone https://github.com/ICIJ/pystou.git
|
|
75
|
+
cd pystou
|
|
76
|
+
```
|
|
77
|
+
|
|
78
|
+
### Install the Package
|
|
79
|
+
|
|
80
|
+
PyStou can be installed using `pip`. It includes all necessary components without additional dependencies.
|
|
81
|
+
|
|
82
|
+
```bash
|
|
83
|
+
pip install .
|
|
84
|
+
```
|
|
85
|
+
|
|
86
|
+
> **Note:** You might need to use `pip3` and/or `sudo` depending on your system configuration.
|
|
87
|
+
|
|
88
|
+
## Usage
|
|
89
|
+
|
|
90
|
+
PyStou provides a unified command-line interface with several subcommands.
|
|
91
|
+
|
|
92
|
+
```bash
|
|
93
|
+
pystou --help
|
|
94
|
+
pystou dedup --help
|
|
95
|
+
pystou extract --help
|
|
96
|
+
pystou cleanup --help
|
|
97
|
+
pystou identify --help
|
|
98
|
+
pystou stats --help
|
|
99
|
+
pystou empty --help
|
|
100
|
+
```
|
|
101
|
+
|
|
102
|
+
### Deduplicate Folders
|
|
103
|
+
|
|
104
|
+
**Purpose:** Identify and manage duplicate directories to keep your filesystem clean.
|
|
105
|
+
|
|
106
|
+
**Command:**
|
|
107
|
+
|
|
108
|
+
```bash
|
|
109
|
+
pystou dedup [directory] [options]
|
|
110
|
+
```
|
|
111
|
+
|
|
112
|
+
**Parameters:**
|
|
113
|
+
|
|
114
|
+
- `directory`: (Optional) The root directory to start scanning from. Defaults to the current directory if not specified.
|
|
115
|
+
|
|
116
|
+
**Options:**
|
|
117
|
+
|
|
118
|
+
- `-r`, `--recursive`: Recursively process subdirectories.
|
|
119
|
+
- `-l LEVEL`, `--level LEVEL`: Maximum depth level for recursion (default: unlimited).
|
|
120
|
+
- `-c CHOICE`, `--default-choice CHOICE`: Default action to apply to all duplicate groups.
|
|
121
|
+
- `1`: Delete duplicates.
|
|
122
|
+
- `2`: Merge contents and delete duplicates.
|
|
123
|
+
- `3`: Skip (do nothing).
|
|
124
|
+
- `-n`, `--dry-run`: Perform a dry run without making any changes.
|
|
125
|
+
- `--log-dir LOG_DIR`: Directory to store log files (default: current directory).
|
|
126
|
+
- `--db-dir DB_DIR`: Directory to store index database (default: current directory).
|
|
127
|
+
|
|
128
|
+
**Examples:**
|
|
129
|
+
|
|
130
|
+
- **Interactive Mode:**
|
|
131
|
+
|
|
132
|
+
```bash
|
|
133
|
+
pystou dedup /path/to/your/folders -r
|
|
134
|
+
```
|
|
135
|
+
|
|
136
|
+
*The script will prompt you for each duplicate group found.*
|
|
137
|
+
|
|
138
|
+
- **Automated Mode with Default Choice (Delete Duplicates):**
|
|
139
|
+
|
|
140
|
+
```bash
|
|
141
|
+
pystou dedup /path/to/your/folders -r -c 1
|
|
142
|
+
```
|
|
143
|
+
|
|
144
|
+
- **Dry Run Mode:**
|
|
145
|
+
|
|
146
|
+
```bash
|
|
147
|
+
pystou dedup /path/to/your/folders -r -n
|
|
148
|
+
```
|
|
149
|
+
|
|
150
|
+
### Extract Archives
|
|
151
|
+
|
|
152
|
+
**Purpose:** Extract various archive formats efficiently and manage them post-extraction.
|
|
153
|
+
|
|
154
|
+
**Supported Formats:**
|
|
155
|
+
- Standard: `.zip`, `.tar`, `.tar.gz`, `.tgz`, `.tar.bz2`, `.tbz`, `.gz`, `.bz2`
|
|
156
|
+
- Zstandard: `.zst`, `.tar.zst`, `.tzst`
|
|
157
|
+
- Outlook: `.pst`
|
|
158
|
+
- Split ZIP: `.z01`, `.z02`, ... (automatically detected with main `.zip` file)
|
|
159
|
+
|
|
160
|
+
**Command:**
|
|
161
|
+
|
|
162
|
+
```bash
|
|
163
|
+
pystou extract [directory] [options]
|
|
164
|
+
```
|
|
165
|
+
|
|
166
|
+
**Parameters:**
|
|
167
|
+
|
|
168
|
+
- `directory`: (Optional) The root directory to start searching for archives. Defaults to the current directory if not specified.
|
|
169
|
+
|
|
170
|
+
**Options:**
|
|
171
|
+
|
|
172
|
+
- `-r`, `--recursive`: Recursively search subdirectories for archives.
|
|
173
|
+
- `-c CHOICE`, `--default-choice CHOICE`: Default action to apply to all archives.
|
|
174
|
+
- `1`: Extract archives.
|
|
175
|
+
- `2`: Skip (do nothing).
|
|
176
|
+
- `-dc DELETE_CHOICE`, `--default-delete-choice DELETE_CHOICE`: Default action when prompted to delete archives after extraction.
|
|
177
|
+
- `1`: Delete the archive after extraction.
|
|
178
|
+
- `2`: Keep the archive after extraction.
|
|
179
|
+
- `-p N`, `--parallel N`: Number of parallel extraction workers (default: 1). Requires `-c` flag.
|
|
180
|
+
- `-N`, `--nested`: Recursively extract archives found inside extracted content.
|
|
181
|
+
- `--max-depth N`: Maximum nesting depth for `--nested` (default: 10).
|
|
182
|
+
- `-n`, `--dry-run`: Perform a dry run without making any changes.
|
|
183
|
+
- `--log-dir LOG_DIR`: Directory to store log files (default: current directory).
|
|
184
|
+
- `--db-dir DB_DIR`: Directory to store index database (default: current directory).
|
|
185
|
+
|
|
186
|
+
**Examples:**
|
|
187
|
+
|
|
188
|
+
- **Interactive Mode:**
|
|
189
|
+
|
|
190
|
+
```bash
|
|
191
|
+
pystou extract /path/to/archives -r
|
|
192
|
+
```
|
|
193
|
+
|
|
194
|
+
*The script will prompt you for each archive found, asking whether to extract or skip.*
|
|
195
|
+
|
|
196
|
+
- **Automated Mode with Default Choices (Extract and Delete Archives):**
|
|
197
|
+
|
|
198
|
+
```bash
|
|
199
|
+
pystou extract /path/to/archives -r -c 1 -dc 1
|
|
200
|
+
```
|
|
201
|
+
|
|
202
|
+
- **Parallel Extraction (4 workers):**
|
|
203
|
+
|
|
204
|
+
```bash
|
|
205
|
+
pystou extract /path/to/archives -r -c 1 -dc 2 -p 4
|
|
206
|
+
```
|
|
207
|
+
|
|
208
|
+
- **Nested Extraction (archives inside archives):**
|
|
209
|
+
|
|
210
|
+
```bash
|
|
211
|
+
pystou extract /path/to/archives -r -c 1 -dc 1 --nested
|
|
212
|
+
```
|
|
213
|
+
|
|
214
|
+
- **Dry Run Mode:**
|
|
215
|
+
|
|
216
|
+
```bash
|
|
217
|
+
pystou extract /path/to/archives -r -n
|
|
218
|
+
```
|
|
219
|
+
|
|
220
|
+
### Cleanup Junk Files
|
|
221
|
+
|
|
222
|
+
**Purpose:** Remove common junk files created by operating systems and applications.
|
|
223
|
+
|
|
224
|
+
**Removed by default:**
|
|
225
|
+
- macOS: `.DS_Store`, `._.DS_Store`, `._*` files, `__MACOSX`, `.AppleDouble`, `.Spotlight-V100`, `.Trashes`, `.fseventsd`, `.TemporaryItems`, `.LSOverride`
|
|
226
|
+
- Windows: `Thumbs.db`, `ehthumbs.db`, `ehthumbs_vista.db`, `desktop.ini`
|
|
227
|
+
|
|
228
|
+
**Command:**
|
|
229
|
+
|
|
230
|
+
```bash
|
|
231
|
+
pystou cleanup [directory] [options]
|
|
232
|
+
```
|
|
233
|
+
|
|
234
|
+
**Options:**
|
|
235
|
+
|
|
236
|
+
- `-r`, `--recursive`: Recursively process subdirectories.
|
|
237
|
+
- `--include PATTERN`: Additional file/directory names to remove (can be used multiple times).
|
|
238
|
+
- `--list-only`: Only list junk files without removing them.
|
|
239
|
+
- `-n`, `--dry-run`: Perform a dry run without making any changes.
|
|
240
|
+
|
|
241
|
+
**Examples:**
|
|
242
|
+
|
|
243
|
+
- **List junk files:**
|
|
244
|
+
|
|
245
|
+
```bash
|
|
246
|
+
pystou cleanup /path/to/folder -r --list-only
|
|
247
|
+
```
|
|
248
|
+
|
|
249
|
+
- **Remove junk files:**
|
|
250
|
+
|
|
251
|
+
```bash
|
|
252
|
+
pystou cleanup /path/to/folder -r
|
|
253
|
+
```
|
|
254
|
+
|
|
255
|
+
- **Remove additional patterns:**
|
|
256
|
+
|
|
257
|
+
```bash
|
|
258
|
+
pystou cleanup /path/to/folder -r --include ".gitkeep" --include "*.bak"
|
|
259
|
+
```
|
|
260
|
+
|
|
261
|
+
### Identify File Types
|
|
262
|
+
|
|
263
|
+
**Purpose:** Detect file types and find potential issues like mismatched extensions or encrypted archives.
|
|
264
|
+
|
|
265
|
+
**Command:**
|
|
266
|
+
|
|
267
|
+
```bash
|
|
268
|
+
pystou identify [directory] [options]
|
|
269
|
+
```
|
|
270
|
+
|
|
271
|
+
**Options:**
|
|
272
|
+
|
|
273
|
+
- `-r`, `--recursive`: Recursively process subdirectories.
|
|
274
|
+
- `--check-mismatch`: Check for files with mismatched extensions.
|
|
275
|
+
- `--check-encrypted`: Check for encrypted ZIP archives.
|
|
276
|
+
- `--check-all`: Run all checks.
|
|
277
|
+
- `--extensions EXT`: Comma-separated list of extensions to check (e.g., `.zip,.pdf`).
|
|
278
|
+
|
|
279
|
+
**Examples:**
|
|
280
|
+
|
|
281
|
+
- **Find mismatched extensions:**
|
|
282
|
+
|
|
283
|
+
```bash
|
|
284
|
+
pystou identify /path/to/folder -r --check-mismatch
|
|
285
|
+
```
|
|
286
|
+
|
|
287
|
+
- **Find encrypted archives:**
|
|
288
|
+
|
|
289
|
+
```bash
|
|
290
|
+
pystou identify /path/to/folder -r --check-encrypted
|
|
291
|
+
```
|
|
292
|
+
|
|
293
|
+
- **Run all checks on specific extensions:**
|
|
294
|
+
|
|
295
|
+
```bash
|
|
296
|
+
pystou identify /path/to/folder -r --check-all --extensions ".zip,.pdf,.docx"
|
|
297
|
+
```
|
|
298
|
+
|
|
299
|
+
### Directory Statistics
|
|
300
|
+
|
|
301
|
+
**Purpose:** Display comprehensive statistics about files and directories.
|
|
302
|
+
|
|
303
|
+
**Command:**
|
|
304
|
+
|
|
305
|
+
```bash
|
|
306
|
+
pystou stats [directory] [options]
|
|
307
|
+
```
|
|
308
|
+
|
|
309
|
+
**Options:**
|
|
310
|
+
|
|
311
|
+
- `-r`, `--recursive`: Recursively process subdirectories.
|
|
312
|
+
- `--top N`: Number of top items to show (default: 10).
|
|
313
|
+
- `--by-extension`: Show breakdown by file extension.
|
|
314
|
+
- `--by-size`: Show largest files.
|
|
315
|
+
- `--json`: Output statistics in JSON format.
|
|
316
|
+
|
|
317
|
+
**Examples:**
|
|
318
|
+
|
|
319
|
+
- **Show directory statistics:**
|
|
320
|
+
|
|
321
|
+
```bash
|
|
322
|
+
pystou stats /path/to/folder -r
|
|
323
|
+
```
|
|
324
|
+
|
|
325
|
+
- **Show largest files:**
|
|
326
|
+
|
|
327
|
+
```bash
|
|
328
|
+
pystou stats /path/to/folder -r --by-size --top 20
|
|
329
|
+
```
|
|
330
|
+
|
|
331
|
+
- **Output as JSON:**
|
|
332
|
+
|
|
333
|
+
```bash
|
|
334
|
+
pystou stats /path/to/folder -r --json
|
|
335
|
+
```
|
|
336
|
+
|
|
337
|
+
### Empty Directories
|
|
338
|
+
|
|
339
|
+
**Purpose:** Find and remove empty directories.
|
|
340
|
+
|
|
341
|
+
**Command:**
|
|
342
|
+
|
|
343
|
+
```bash
|
|
344
|
+
pystou empty [directory] [options]
|
|
345
|
+
```
|
|
346
|
+
|
|
347
|
+
**Options:**
|
|
348
|
+
|
|
349
|
+
- `-r`, `--recursive`: Recursively process subdirectories.
|
|
350
|
+
- `--list-only`: Only list empty directories without removing them.
|
|
351
|
+
- `--include-hidden`: Include hidden directories (starting with `.`).
|
|
352
|
+
- `-n`, `--dry-run`: Perform a dry run without making any changes.
|
|
353
|
+
|
|
354
|
+
**Examples:**
|
|
355
|
+
|
|
356
|
+
- **List empty directories:**
|
|
357
|
+
|
|
358
|
+
```bash
|
|
359
|
+
pystou empty /path/to/folder -r --list-only
|
|
360
|
+
```
|
|
361
|
+
|
|
362
|
+
- **Remove empty directories:**
|
|
363
|
+
|
|
364
|
+
```bash
|
|
365
|
+
pystou empty /path/to/folder -r
|
|
366
|
+
```
|
|
367
|
+
|
|
368
|
+
- **Include hidden directories:**
|
|
369
|
+
|
|
370
|
+
```bash
|
|
371
|
+
pystou empty /path/to/folder -r --include-hidden
|
|
372
|
+
```
|
|
373
|
+
|
|
374
|
+
## Running Tests
|
|
375
|
+
|
|
376
|
+
PyStou includes a suite of unit tests to ensure everything works smoothly. Here's how to run them:
|
|
377
|
+
|
|
378
|
+
```bash
|
|
379
|
+
make test
|
|
380
|
+
```
|
|
381
|
+
|
|
382
|
+
Or manually:
|
|
383
|
+
|
|
384
|
+
```bash
|
|
385
|
+
python3 -m unittest discover tests
|
|
386
|
+
```
|
|
387
|
+
|
|
388
|
+
> **Note:** Ensure you have all necessary command-line tools installed (`readpst`, `zstd`, `7z`) before running tests that involve archive extraction.
|
|
389
|
+
|
|
390
|
+
## License
|
|
391
|
+
|
|
392
|
+
Distributed under the [MIT License](LICENSE). See `LICENSE` for more information.
|