devcull 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- cull/__init__.py +1 -0
- cull/__main__.py +2 -0
- cull/cli.py +189 -0
- cull/scan.py +145 -0
- devcull-0.1.0.dist-info/METADATA +73 -0
- devcull-0.1.0.dist-info/RECORD +9 -0
- devcull-0.1.0.dist-info/WHEEL +5 -0
- devcull-0.1.0.dist-info/entry_points.txt +2 -0
- devcull-0.1.0.dist-info/top_level.txt +1 -0
cull/__init__.py
ADDED
|
@@ -0,0 +1 @@
|
|
|
1
|
+
__version__ = "0.1.0"
|
cull/__main__.py
ADDED
cull/cli.py
ADDED
|
@@ -0,0 +1,189 @@
|
|
|
1
|
+
import shutil
|
|
2
|
+
import sys
|
|
3
|
+
from datetime import datetime, timezone
|
|
4
|
+
from pathlib import Path
|
|
5
|
+
|
|
6
|
+
import click
|
|
7
|
+
from rich.console import Console
|
|
8
|
+
from rich.table import Table
|
|
9
|
+
from rich import print as rprint
|
|
10
|
+
|
|
11
|
+
from cull import __version__
|
|
12
|
+
from cull.scan import scan, Hit
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
console = Console()
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
def _fmt_size(n: int) -> str:
|
|
19
|
+
for unit in ("B", "KB", "MB", "GB"):
|
|
20
|
+
if n < 1024:
|
|
21
|
+
return f"{n:.1f} {unit}"
|
|
22
|
+
n /= 1024
|
|
23
|
+
return f"{n:.1f} TB"
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
def _age_days(dt: datetime) -> int:
|
|
27
|
+
now = datetime.now(tz=timezone.utc)
|
|
28
|
+
return (now - dt).days
|
|
29
|
+
|
|
30
|
+
|
|
31
|
+
def _delete(h: Hit):
|
|
32
|
+
try:
|
|
33
|
+
shutil.rmtree(h.path)
|
|
34
|
+
return True
|
|
35
|
+
except Exception as e:
|
|
36
|
+
console.print(f" [red]couldn't remove {h.path.name}: {e}[/red]")
|
|
37
|
+
return False
|
|
38
|
+
|
|
39
|
+
|
|
40
|
+
def _show_table(hits: list[Hit]):
|
|
41
|
+
t = Table(show_header=True, header_style="bold", box=None, pad_edge=False, show_edge=False)
|
|
42
|
+
t.add_column("#", style="dim", width=4)
|
|
43
|
+
t.add_column("path", no_wrap=False, max_width=60)
|
|
44
|
+
t.add_column("size", justify="right", style="yellow")
|
|
45
|
+
t.add_column("last commit", justify="right", style="cyan")
|
|
46
|
+
|
|
47
|
+
for i, h in enumerate(hits, 1):
|
|
48
|
+
age = _age_days(h.last_used)
|
|
49
|
+
if age > 365:
|
|
50
|
+
age_str = f"{age // 365}y ago"
|
|
51
|
+
elif age > 30:
|
|
52
|
+
age_str = f"{age // 30}mo ago"
|
|
53
|
+
else:
|
|
54
|
+
age_str = f"{age}d ago"
|
|
55
|
+
|
|
56
|
+
t.add_row(
|
|
57
|
+
str(i),
|
|
58
|
+
str(h.path),
|
|
59
|
+
_fmt_size(h.size),
|
|
60
|
+
age_str,
|
|
61
|
+
)
|
|
62
|
+
|
|
63
|
+
console.print(t)
|
|
64
|
+
|
|
65
|
+
|
|
66
|
+
@click.command()
|
|
67
|
+
@click.argument("path", default=".", type=click.Path(exists=True, file_okay=False))
|
|
68
|
+
@click.option("--older-than", default=90, metavar="DAYS",
|
|
69
|
+
help="only show caches not touched in N days (default: 90)")
|
|
70
|
+
@click.option("--delete", is_flag=True, help="interactively delete found caches")
|
|
71
|
+
@click.option("--all", "delete_all", is_flag=True, help="delete everything without asking (use with care)")
|
|
72
|
+
@click.option("--dry-run", is_flag=True, help="show what would be deleted but don't touch anything")
|
|
73
|
+
@click.version_option(__version__, prog_name="cull")
|
|
74
|
+
def cli(path, older_than, delete, delete_all, dry_run):
|
|
75
|
+
"""Find and remove stale dev cache directories.
|
|
76
|
+
|
|
77
|
+
Scans PATH (default: current directory) for node_modules, .venv,
|
|
78
|
+
__pycache__, and similar directories that are safe to delete.
|
|
79
|
+
"""
|
|
80
|
+
root = Path(path).resolve()
|
|
81
|
+
|
|
82
|
+
hits: list[Hit] = []
|
|
83
|
+
|
|
84
|
+
with console.status(f"scanning [dim]{root}[/dim]...", spinner="dots") as status:
|
|
85
|
+
def on_progress(p: Path):
|
|
86
|
+
# trim the path so it doesn't wrap
|
|
87
|
+
label = str(p)
|
|
88
|
+
if len(label) > 60:
|
|
89
|
+
label = "..." + label[-57:]
|
|
90
|
+
status.update(f"scanning [dim]{label}[/dim]")
|
|
91
|
+
|
|
92
|
+
hits = scan(root, progress_cb=on_progress)
|
|
93
|
+
|
|
94
|
+
if not hits:
|
|
95
|
+
rprint("[green]nothing found, you're clean[/green]")
|
|
96
|
+
return
|
|
97
|
+
|
|
98
|
+
# filter by age
|
|
99
|
+
now = datetime.now(tz=timezone.utc)
|
|
100
|
+
filtered = [h for h in hits if _age_days(h.last_used) >= older_than]
|
|
101
|
+
|
|
102
|
+
if not filtered:
|
|
103
|
+
rprint(f"[green]found {len(hits)} cache dirs but all were touched in the last {older_than} days[/green]")
|
|
104
|
+
return
|
|
105
|
+
|
|
106
|
+
total = sum(h.size for h in filtered)
|
|
107
|
+
rprint(f"\nfound [bold]{len(filtered)}[/bold] stale cache dirs totaling [yellow bold]{_fmt_size(total)}[/yellow bold]\n")
|
|
108
|
+
_show_table(filtered)
|
|
109
|
+
|
|
110
|
+
if dry_run:
|
|
111
|
+
rprint("\n[dim](dry run — nothing deleted)[/dim]")
|
|
112
|
+
return
|
|
113
|
+
|
|
114
|
+
if not delete and not delete_all:
|
|
115
|
+
return
|
|
116
|
+
|
|
117
|
+
print()
|
|
118
|
+
|
|
119
|
+
if delete_all:
|
|
120
|
+
if not click.confirm(f"delete all {len(filtered)} dirs ({_fmt_size(total)})?"):
|
|
121
|
+
return
|
|
122
|
+
removed = freed = 0
|
|
123
|
+
for h in filtered:
|
|
124
|
+
if _delete(h):
|
|
125
|
+
freed += h.size
|
|
126
|
+
removed += 1
|
|
127
|
+
rprint(f"\n[green]removed {removed} dirs, freed {_fmt_size(freed)}[/green]")
|
|
128
|
+
return
|
|
129
|
+
|
|
130
|
+
# interactive mode
|
|
131
|
+
rprint("[dim]enter numbers to delete (e.g. 1 3 5), 'a' for all, or q to quit[/dim]\n")
|
|
132
|
+
while True:
|
|
133
|
+
try:
|
|
134
|
+
raw = input("> ").strip().lower()
|
|
135
|
+
except (EOFError, KeyboardInterrupt):
|
|
136
|
+
break
|
|
137
|
+
|
|
138
|
+
if raw in ("q", "quit", ""):
|
|
139
|
+
break
|
|
140
|
+
|
|
141
|
+
if raw == "a":
|
|
142
|
+
chosen = filtered
|
|
143
|
+
else:
|
|
144
|
+
indices = []
|
|
145
|
+
valid = True
|
|
146
|
+
for tok in raw.split():
|
|
147
|
+
try:
|
|
148
|
+
n = int(tok)
|
|
149
|
+
if 1 <= n <= len(filtered):
|
|
150
|
+
indices.append(n - 1)
|
|
151
|
+
else:
|
|
152
|
+
rprint(f"[red]{n} is out of range[/red]")
|
|
153
|
+
valid = False
|
|
154
|
+
break
|
|
155
|
+
except ValueError:
|
|
156
|
+
rprint(f"[red]'{tok}' isn't a number[/red]")
|
|
157
|
+
valid = False
|
|
158
|
+
break
|
|
159
|
+
if not valid:
|
|
160
|
+
continue
|
|
161
|
+
chosen = [filtered[i] for i in indices]
|
|
162
|
+
|
|
163
|
+
if not chosen:
|
|
164
|
+
continue
|
|
165
|
+
|
|
166
|
+
sz = sum(h.size for h in chosen)
|
|
167
|
+
if not click.confirm(f"delete {len(chosen)} dir(s) ({_fmt_size(sz)})?"):
|
|
168
|
+
continue
|
|
169
|
+
|
|
170
|
+
removed = freed = 0
|
|
171
|
+
for h in chosen:
|
|
172
|
+
console.print(f" removing [dim]{h.path}[/dim]...", end=" ")
|
|
173
|
+
if _delete(h):
|
|
174
|
+
freed += h.size
|
|
175
|
+
removed += 1
|
|
176
|
+
console.print("[green]done[/green]")
|
|
177
|
+
# remove from list so 'a' doesn't re-select deleted entries
|
|
178
|
+
filtered = [x for x in filtered if x.path != h.path]
|
|
179
|
+
|
|
180
|
+
rprint(f"[green]freed {_fmt_size(freed)}[/green]")
|
|
181
|
+
|
|
182
|
+
if not filtered:
|
|
183
|
+
rprint("[green]nothing left[/green]")
|
|
184
|
+
break
|
|
185
|
+
|
|
186
|
+
# reprint table with updated indices
|
|
187
|
+
print()
|
|
188
|
+
_show_table(filtered)
|
|
189
|
+
print()
|
cull/scan.py
ADDED
|
@@ -0,0 +1,145 @@
|
|
|
1
|
+
import os
|
|
2
|
+
import subprocess
|
|
3
|
+
from dataclasses import dataclass, field
|
|
4
|
+
from datetime import datetime, timezone
|
|
5
|
+
from pathlib import Path
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
# directories that are definitely just build/cache garbage
|
|
9
|
+
CACHE_DIRS = {
|
|
10
|
+
"node_modules",
|
|
11
|
+
".venv", "venv", ".virtualenv",
|
|
12
|
+
".next", ".nuxt", ".svelte-kit", ".solid",
|
|
13
|
+
"__pycache__", ".pytest_cache", ".mypy_cache", ".ruff_cache",
|
|
14
|
+
".tox",
|
|
15
|
+
"dist", "build", "out",
|
|
16
|
+
".parcel-cache", ".turbo", ".sass-cache",
|
|
17
|
+
".gradle", ".gradle-cache",
|
|
18
|
+
".angular",
|
|
19
|
+
}
|
|
20
|
+
|
|
21
|
+
# only count these as cache if there's a project marker nearby
|
|
22
|
+
# (don't nuke a top-level "target" folder that could be anything)
|
|
23
|
+
CONDITIONAL = {
|
|
24
|
+
"target", # rust/maven — check for Cargo.toml or pom.xml
|
|
25
|
+
}
|
|
26
|
+
|
|
27
|
+
PROJECT_MARKERS = {
|
|
28
|
+
"package.json", "Cargo.toml", "pom.xml", "pyproject.toml",
|
|
29
|
+
"setup.py", "go.mod", "build.gradle", ".git",
|
|
30
|
+
}
|
|
31
|
+
|
|
32
|
+
|
|
33
|
+
@dataclass
|
|
34
|
+
class Hit:
|
|
35
|
+
path: Path
|
|
36
|
+
size: int = 0 # bytes
|
|
37
|
+
last_used: datetime = field(default_factory=lambda: datetime.min.replace(tzinfo=timezone.utc))
|
|
38
|
+
project: Path | None = None
|
|
39
|
+
|
|
40
|
+
|
|
41
|
+
def _project_root(p: Path) -> Path | None:
|
|
42
|
+
"""walk up from p looking for a project marker"""
|
|
43
|
+
cur = p.parent
|
|
44
|
+
for _ in range(6): # don't walk forever
|
|
45
|
+
if any((cur / m).exists() for m in PROJECT_MARKERS):
|
|
46
|
+
return cur
|
|
47
|
+
if cur.parent == cur:
|
|
48
|
+
break
|
|
49
|
+
cur = cur.parent
|
|
50
|
+
return None
|
|
51
|
+
|
|
52
|
+
|
|
53
|
+
def _dir_size(p: Path) -> int:
|
|
54
|
+
total = 0
|
|
55
|
+
# os.scandir is faster than os.walk for shallow trees,
|
|
56
|
+
# but node_modules can be 8 levels deep so we walk
|
|
57
|
+
try:
|
|
58
|
+
for entry in os.scandir(p):
|
|
59
|
+
if entry.is_symlink():
|
|
60
|
+
continue
|
|
61
|
+
if entry.is_dir(follow_symlinks=False):
|
|
62
|
+
total += _dir_size(Path(entry.path))
|
|
63
|
+
else:
|
|
64
|
+
try:
|
|
65
|
+
total += entry.stat(follow_symlinks=False).st_size
|
|
66
|
+
except OSError:
|
|
67
|
+
pass
|
|
68
|
+
except PermissionError:
|
|
69
|
+
pass
|
|
70
|
+
return total
|
|
71
|
+
|
|
72
|
+
|
|
73
|
+
def _last_git_commit(project: Path) -> datetime | None:
|
|
74
|
+
# git log is slow but mtime lies on windows (copying a file updates mtime)
|
|
75
|
+
try:
|
|
76
|
+
r = subprocess.run(
|
|
77
|
+
["git", "-C", str(project), "log", "-1", "--format=%ct"],
|
|
78
|
+
capture_output=True, text=True, timeout=5,
|
|
79
|
+
)
|
|
80
|
+
if r.returncode == 0 and r.stdout.strip():
|
|
81
|
+
ts = int(r.stdout.strip())
|
|
82
|
+
return datetime.fromtimestamp(ts, tz=timezone.utc)
|
|
83
|
+
except Exception:
|
|
84
|
+
pass
|
|
85
|
+
return None
|
|
86
|
+
|
|
87
|
+
|
|
88
|
+
def _last_used(hit_path: Path, project: Path | None) -> datetime:
|
|
89
|
+
if project:
|
|
90
|
+
t = _last_git_commit(project)
|
|
91
|
+
if t:
|
|
92
|
+
return t
|
|
93
|
+
# fall back to the mtime of the cache dir itself
|
|
94
|
+
try:
|
|
95
|
+
st = hit_path.stat()
|
|
96
|
+
return datetime.fromtimestamp(st.st_mtime, tz=timezone.utc)
|
|
97
|
+
except OSError:
|
|
98
|
+
return datetime.min.replace(tzinfo=timezone.utc)
|
|
99
|
+
|
|
100
|
+
|
|
101
|
+
def scan(root: Path, progress_cb=None) -> list[Hit]:
|
|
102
|
+
"""
|
|
103
|
+
Walk root looking for cache directories. Doesn't recurse into found dirs.
|
|
104
|
+
progress_cb(path) is called as we enter each directory, for spinner updates.
|
|
105
|
+
"""
|
|
106
|
+
hits = []
|
|
107
|
+
# TODO: handle junction points (windows symlink variant)
|
|
108
|
+
|
|
109
|
+
for dirpath, dirnames, _ in os.walk(root, topdown=True, onerror=None, followlinks=False):
|
|
110
|
+
p = Path(dirpath)
|
|
111
|
+
|
|
112
|
+
if progress_cb:
|
|
113
|
+
progress_cb(p)
|
|
114
|
+
|
|
115
|
+
# prune dirs we should never descend into
|
|
116
|
+
prune = []
|
|
117
|
+
for d in dirnames:
|
|
118
|
+
if d.startswith(".git") and d != ".git":
|
|
119
|
+
prune.append(d)
|
|
120
|
+
continue
|
|
121
|
+
|
|
122
|
+
if d in CACHE_DIRS:
|
|
123
|
+
full = p / d
|
|
124
|
+
proj = _project_root(full)
|
|
125
|
+
h = Hit(path=full, project=proj)
|
|
126
|
+
hits.append(h)
|
|
127
|
+
prune.append(d)
|
|
128
|
+
continue
|
|
129
|
+
|
|
130
|
+
if d in CONDITIONAL:
|
|
131
|
+
# only include 'target' if a build marker is in the immediate parent
|
|
132
|
+
if (p / "Cargo.toml").exists() or (p / "pom.xml").exists():
|
|
133
|
+
full = p / d
|
|
134
|
+
hits.append(Hit(path=full, project=p))
|
|
135
|
+
prune.append(d)
|
|
136
|
+
|
|
137
|
+
for d in prune:
|
|
138
|
+
dirnames.remove(d)
|
|
139
|
+
|
|
140
|
+
# fill in sizes + last-used after the walk so the spinner can run cleanly
|
|
141
|
+
for h in hits:
|
|
142
|
+
h.size = _dir_size(h.path)
|
|
143
|
+
h.last_used = _last_used(h.path, h.project)
|
|
144
|
+
|
|
145
|
+
return hits
|
|
@@ -0,0 +1,73 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: devcull
|
|
3
|
+
Version: 0.1.0
|
|
4
|
+
Summary: remove stale dev cache directories
|
|
5
|
+
License-Expression: MIT
|
|
6
|
+
Project-URL: Repository, https://github.com/gnomecromancer/cull
|
|
7
|
+
Keywords: developer-tools,cache,cleanup,node_modules
|
|
8
|
+
Classifier: Environment :: Console
|
|
9
|
+
Classifier: Operating System :: OS Independent
|
|
10
|
+
Classifier: Programming Language :: Python :: 3
|
|
11
|
+
Classifier: Topic :: Utilities
|
|
12
|
+
Requires-Python: >=3.10
|
|
13
|
+
Description-Content-Type: text/markdown
|
|
14
|
+
Requires-Dist: click>=8.0
|
|
15
|
+
Requires-Dist: rich>=13.0
|
|
16
|
+
|
|
17
|
+
# cull
|
|
18
|
+
|
|
19
|
+
I ran WinDirStat one afternoon because my SSD was at 94% and found 74 GB of `node_modules` from projects I hadn't touched in over a year. Deleting them by hand is tedious. This does it for you.
|
|
20
|
+
|
|
21
|
+
```
|
|
22
|
+
$ cull ~/projects
|
|
23
|
+
found 23 stale cache dirs totaling 61.3 GB
|
|
24
|
+
|
|
25
|
+
# path size last commit
|
|
26
|
+
1 ~/projects/old-saas/node_modules 18.2 GB 14mo ago
|
|
27
|
+
2 ~/projects/prototype-v2/node_modules 9.4 GB 11mo ago
|
|
28
|
+
3 ~/projects/prototype-v2/.next 2.1 GB 11mo ago
|
|
29
|
+
...
|
|
30
|
+
|
|
31
|
+
> a
|
|
32
|
+
delete 23 dir(s) (61.3 GB)? [y/N]: y
|
|
33
|
+
freed 61.3 GB
|
|
34
|
+
```
|
|
35
|
+
|
|
36
|
+
## install
|
|
37
|
+
|
|
38
|
+
```
|
|
39
|
+
pip install cull
|
|
40
|
+
```
|
|
41
|
+
|
|
42
|
+
## usage
|
|
43
|
+
|
|
44
|
+
```
|
|
45
|
+
cull [PATH] scan PATH (default: current dir)
|
|
46
|
+
--older-than DAYS only show caches untouched for N days (default: 90)
|
|
47
|
+
--delete interactively pick what to remove
|
|
48
|
+
--all delete everything found without asking
|
|
49
|
+
--dry-run show what would go, don't touch anything
|
|
50
|
+
```
|
|
51
|
+
|
|
52
|
+
## what it looks for
|
|
53
|
+
|
|
54
|
+
- `node_modules`, `.next`, `.nuxt`, `.svelte-kit`, `.parcel-cache`, `.turbo`
|
|
55
|
+
- `.venv`, `venv`, `.virtualenv`, `.tox`
|
|
56
|
+
- `__pycache__`, `.pytest_cache`, `.mypy_cache`, `.ruff_cache`
|
|
57
|
+
- `dist`, `build`, `out` (inside project directories)
|
|
58
|
+
- `.gradle`, `.angular`, `.sass-cache`
|
|
59
|
+
- `target` (only inside Rust or Maven projects)
|
|
60
|
+
|
|
61
|
+
## what it won't do
|
|
62
|
+
|
|
63
|
+
It won't scan for "large files" generically. It won't suggest you delete your Downloads folder or anything outside the above list. The whole point is that it only removes things you can safely recreate by running `npm install` or `pip install` again.
|
|
64
|
+
|
|
65
|
+
It also won't run automatically or add itself to your startup. You run it when you want to run it.
|
|
66
|
+
|
|
67
|
+
## "last commit" column
|
|
68
|
+
|
|
69
|
+
cull tries to find the last git commit in the parent project. If there's no git repo, it falls back to the directory's modification time. The commit date is more reliable — copying files around updates mtime but doesn't change when you actually worked on the project.
|
|
70
|
+
|
|
71
|
+
## license
|
|
72
|
+
|
|
73
|
+
MIT
|
|
@@ -0,0 +1,9 @@
|
|
|
1
|
+
cull/__init__.py,sha256=kUR5RAFc7HCeiqdlX36dZOHkUI5wI6V_43RpEcD8b-0,22
|
|
2
|
+
cull/__main__.py,sha256=tBonSz1D22vdWC05SzTjRxMyJGHE07Upl-Wsciig7Lo,31
|
|
3
|
+
cull/cli.py,sha256=0GNCS0HsCILZ3HFyPhWBdMireRqQUUrBynaI_dnsVJo,5786
|
|
4
|
+
cull/scan.py,sha256=S6eg4Zz6Yr2gSfKoi0rVeP1Ck-OrTtXF6_wpiwglgAI,4415
|
|
5
|
+
devcull-0.1.0.dist-info/METADATA,sha256=p6HRmTZkN1Uw7TdMXge4uxf6pwfFrTSZXGa5AlPYC1k,2624
|
|
6
|
+
devcull-0.1.0.dist-info/WHEEL,sha256=aeYiig01lYGDzBgS8HxWXOg3uV61G9ijOsup-k9o1sk,91
|
|
7
|
+
devcull-0.1.0.dist-info/entry_points.txt,sha256=WW4h2nPFJjgc74nd6oPUaBYsf3HGfmkPa_IVil9gSSA,38
|
|
8
|
+
devcull-0.1.0.dist-info/top_level.txt,sha256=mnLdGAQAr6_XdZQHse8OrLmQM8PWEa20JMh5ZAeNrls,5
|
|
9
|
+
devcull-0.1.0.dist-info/RECORD,,
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
cull
|