devcull 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
cull/__init__.py ADDED
@@ -0,0 +1 @@
1
+ __version__ = "0.1.0"
cull/__main__.py ADDED
@@ -0,0 +1,2 @@
1
+ from cull.cli import cli
2
+ cli()
cull/cli.py ADDED
@@ -0,0 +1,189 @@
1
+ import shutil
2
+ import sys
3
+ from datetime import datetime, timezone
4
+ from pathlib import Path
5
+
6
+ import click
7
+ from rich.console import Console
8
+ from rich.table import Table
9
+ from rich import print as rprint
10
+
11
+ from cull import __version__
12
+ from cull.scan import scan, Hit
13
+
14
+
15
+ console = Console()
16
+
17
+
18
+ def _fmt_size(n: int) -> str:
19
+ for unit in ("B", "KB", "MB", "GB"):
20
+ if n < 1024:
21
+ return f"{n:.1f} {unit}"
22
+ n /= 1024
23
+ return f"{n:.1f} TB"
24
+
25
+
26
+ def _age_days(dt: datetime) -> int:
27
+ now = datetime.now(tz=timezone.utc)
28
+ return (now - dt).days
29
+
30
+
31
+ def _delete(h: Hit):
32
+ try:
33
+ shutil.rmtree(h.path)
34
+ return True
35
+ except Exception as e:
36
+ console.print(f" [red]couldn't remove {h.path.name}: {e}[/red]")
37
+ return False
38
+
39
+
40
+ def _show_table(hits: list[Hit]):
41
+ t = Table(show_header=True, header_style="bold", box=None, pad_edge=False, show_edge=False)
42
+ t.add_column("#", style="dim", width=4)
43
+ t.add_column("path", no_wrap=False, max_width=60)
44
+ t.add_column("size", justify="right", style="yellow")
45
+ t.add_column("last commit", justify="right", style="cyan")
46
+
47
+ for i, h in enumerate(hits, 1):
48
+ age = _age_days(h.last_used)
49
+ if age > 365:
50
+ age_str = f"{age // 365}y ago"
51
+ elif age > 30:
52
+ age_str = f"{age // 30}mo ago"
53
+ else:
54
+ age_str = f"{age}d ago"
55
+
56
+ t.add_row(
57
+ str(i),
58
+ str(h.path),
59
+ _fmt_size(h.size),
60
+ age_str,
61
+ )
62
+
63
+ console.print(t)
64
+
65
+
66
+ @click.command()
67
+ @click.argument("path", default=".", type=click.Path(exists=True, file_okay=False))
68
+ @click.option("--older-than", default=90, metavar="DAYS",
69
+ help="only show caches not touched in N days (default: 90)")
70
+ @click.option("--delete", is_flag=True, help="interactively delete found caches")
71
+ @click.option("--all", "delete_all", is_flag=True, help="delete everything without asking (use with care)")
72
+ @click.option("--dry-run", is_flag=True, help="show what would be deleted but don't touch anything")
73
+ @click.version_option(__version__, prog_name="cull")
74
+ def cli(path, older_than, delete, delete_all, dry_run):
75
+ """Find and remove stale dev cache directories.
76
+
77
+ Scans PATH (default: current directory) for node_modules, .venv,
78
+ __pycache__, and similar directories that are safe to delete.
79
+ """
80
+ root = Path(path).resolve()
81
+
82
+ hits: list[Hit] = []
83
+
84
+ with console.status(f"scanning [dim]{root}[/dim]...", spinner="dots") as status:
85
+ def on_progress(p: Path):
86
+ # trim the path so it doesn't wrap
87
+ label = str(p)
88
+ if len(label) > 60:
89
+ label = "..." + label[-57:]
90
+ status.update(f"scanning [dim]{label}[/dim]")
91
+
92
+ hits = scan(root, progress_cb=on_progress)
93
+
94
+ if not hits:
95
+ rprint("[green]nothing found, you're clean[/green]")
96
+ return
97
+
98
+ # filter by age
99
+ now = datetime.now(tz=timezone.utc)
100
+ filtered = [h for h in hits if _age_days(h.last_used) >= older_than]
101
+
102
+ if not filtered:
103
+ rprint(f"[green]found {len(hits)} cache dirs but all were touched in the last {older_than} days[/green]")
104
+ return
105
+
106
+ total = sum(h.size for h in filtered)
107
+ rprint(f"\nfound [bold]{len(filtered)}[/bold] stale cache dirs totaling [yellow bold]{_fmt_size(total)}[/yellow bold]\n")
108
+ _show_table(filtered)
109
+
110
+ if dry_run:
111
+ rprint("\n[dim](dry run — nothing deleted)[/dim]")
112
+ return
113
+
114
+ if not delete and not delete_all:
115
+ return
116
+
117
+ print()
118
+
119
+ if delete_all:
120
+ if not click.confirm(f"delete all {len(filtered)} dirs ({_fmt_size(total)})?"):
121
+ return
122
+ removed = freed = 0
123
+ for h in filtered:
124
+ if _delete(h):
125
+ freed += h.size
126
+ removed += 1
127
+ rprint(f"\n[green]removed {removed} dirs, freed {_fmt_size(freed)}[/green]")
128
+ return
129
+
130
+ # interactive mode
131
+ rprint("[dim]enter numbers to delete (e.g. 1 3 5), 'a' for all, or q to quit[/dim]\n")
132
+ while True:
133
+ try:
134
+ raw = input("> ").strip().lower()
135
+ except (EOFError, KeyboardInterrupt):
136
+ break
137
+
138
+ if raw in ("q", "quit", ""):
139
+ break
140
+
141
+ if raw == "a":
142
+ chosen = filtered
143
+ else:
144
+ indices = []
145
+ valid = True
146
+ for tok in raw.split():
147
+ try:
148
+ n = int(tok)
149
+ if 1 <= n <= len(filtered):
150
+ indices.append(n - 1)
151
+ else:
152
+ rprint(f"[red]{n} is out of range[/red]")
153
+ valid = False
154
+ break
155
+ except ValueError:
156
+ rprint(f"[red]'{tok}' isn't a number[/red]")
157
+ valid = False
158
+ break
159
+ if not valid:
160
+ continue
161
+ chosen = [filtered[i] for i in indices]
162
+
163
+ if not chosen:
164
+ continue
165
+
166
+ sz = sum(h.size for h in chosen)
167
+ if not click.confirm(f"delete {len(chosen)} dir(s) ({_fmt_size(sz)})?"):
168
+ continue
169
+
170
+ removed = freed = 0
171
+ for h in chosen:
172
+ console.print(f" removing [dim]{h.path}[/dim]...", end=" ")
173
+ if _delete(h):
174
+ freed += h.size
175
+ removed += 1
176
+ console.print("[green]done[/green]")
177
+ # remove from list so 'a' doesn't re-select deleted entries
178
+ filtered = [x for x in filtered if x.path != h.path]
179
+
180
+ rprint(f"[green]freed {_fmt_size(freed)}[/green]")
181
+
182
+ if not filtered:
183
+ rprint("[green]nothing left[/green]")
184
+ break
185
+
186
+ # reprint table with updated indices
187
+ print()
188
+ _show_table(filtered)
189
+ print()
cull/scan.py ADDED
@@ -0,0 +1,145 @@
1
+ import os
2
+ import subprocess
3
+ from dataclasses import dataclass, field
4
+ from datetime import datetime, timezone
5
+ from pathlib import Path
6
+
7
+
8
+ # directories that are definitely just build/cache garbage
9
+ CACHE_DIRS = {
10
+ "node_modules",
11
+ ".venv", "venv", ".virtualenv",
12
+ ".next", ".nuxt", ".svelte-kit", ".solid",
13
+ "__pycache__", ".pytest_cache", ".mypy_cache", ".ruff_cache",
14
+ ".tox",
15
+ "dist", "build", "out",
16
+ ".parcel-cache", ".turbo", ".sass-cache",
17
+ ".gradle", ".gradle-cache",
18
+ ".angular",
19
+ }
20
+
21
+ # only count these as cache if there's a project marker nearby
22
+ # (don't nuke a top-level "target" folder that could be anything)
23
+ CONDITIONAL = {
24
+ "target", # rust/maven — check for Cargo.toml or pom.xml
25
+ }
26
+
27
+ PROJECT_MARKERS = {
28
+ "package.json", "Cargo.toml", "pom.xml", "pyproject.toml",
29
+ "setup.py", "go.mod", "build.gradle", ".git",
30
+ }
31
+
32
+
33
+ @dataclass
34
+ class Hit:
35
+ path: Path
36
+ size: int = 0 # bytes
37
+ last_used: datetime = field(default_factory=lambda: datetime.min.replace(tzinfo=timezone.utc))
38
+ project: Path | None = None
39
+
40
+
41
+ def _project_root(p: Path) -> Path | None:
42
+ """walk up from p looking for a project marker"""
43
+ cur = p.parent
44
+ for _ in range(6): # don't walk forever
45
+ if any((cur / m).exists() for m in PROJECT_MARKERS):
46
+ return cur
47
+ if cur.parent == cur:
48
+ break
49
+ cur = cur.parent
50
+ return None
51
+
52
+
53
+ def _dir_size(p: Path) -> int:
54
+ total = 0
55
+ # os.scandir is faster than os.walk for shallow trees,
56
+ # but node_modules can be 8 levels deep so we walk
57
+ try:
58
+ for entry in os.scandir(p):
59
+ if entry.is_symlink():
60
+ continue
61
+ if entry.is_dir(follow_symlinks=False):
62
+ total += _dir_size(Path(entry.path))
63
+ else:
64
+ try:
65
+ total += entry.stat(follow_symlinks=False).st_size
66
+ except OSError:
67
+ pass
68
+ except PermissionError:
69
+ pass
70
+ return total
71
+
72
+
73
+ def _last_git_commit(project: Path) -> datetime | None:
74
+ # git log is slow but mtime lies on windows (copying a file updates mtime)
75
+ try:
76
+ r = subprocess.run(
77
+ ["git", "-C", str(project), "log", "-1", "--format=%ct"],
78
+ capture_output=True, text=True, timeout=5,
79
+ )
80
+ if r.returncode == 0 and r.stdout.strip():
81
+ ts = int(r.stdout.strip())
82
+ return datetime.fromtimestamp(ts, tz=timezone.utc)
83
+ except Exception:
84
+ pass
85
+ return None
86
+
87
+
88
+ def _last_used(hit_path: Path, project: Path | None) -> datetime:
89
+ if project:
90
+ t = _last_git_commit(project)
91
+ if t:
92
+ return t
93
+ # fall back to the mtime of the cache dir itself
94
+ try:
95
+ st = hit_path.stat()
96
+ return datetime.fromtimestamp(st.st_mtime, tz=timezone.utc)
97
+ except OSError:
98
+ return datetime.min.replace(tzinfo=timezone.utc)
99
+
100
+
101
+ def scan(root: Path, progress_cb=None) -> list[Hit]:
102
+ """
103
+ Walk root looking for cache directories. Doesn't recurse into found dirs.
104
+ progress_cb(path) is called as we enter each directory, for spinner updates.
105
+ """
106
+ hits = []
107
+ # TODO: handle junction points (windows symlink variant)
108
+
109
+ for dirpath, dirnames, _ in os.walk(root, topdown=True, onerror=None, followlinks=False):
110
+ p = Path(dirpath)
111
+
112
+ if progress_cb:
113
+ progress_cb(p)
114
+
115
+ # prune dirs we should never descend into
116
+ prune = []
117
+ for d in dirnames:
118
+ if d.startswith(".git") and d != ".git":
119
+ prune.append(d)
120
+ continue
121
+
122
+ if d in CACHE_DIRS:
123
+ full = p / d
124
+ proj = _project_root(full)
125
+ h = Hit(path=full, project=proj)
126
+ hits.append(h)
127
+ prune.append(d)
128
+ continue
129
+
130
+ if d in CONDITIONAL:
131
+ # only include 'target' if a build marker is in the immediate parent
132
+ if (p / "Cargo.toml").exists() or (p / "pom.xml").exists():
133
+ full = p / d
134
+ hits.append(Hit(path=full, project=p))
135
+ prune.append(d)
136
+
137
+ for d in prune:
138
+ dirnames.remove(d)
139
+
140
+ # fill in sizes + last-used after the walk so the spinner can run cleanly
141
+ for h in hits:
142
+ h.size = _dir_size(h.path)
143
+ h.last_used = _last_used(h.path, h.project)
144
+
145
+ return hits
@@ -0,0 +1,73 @@
1
+ Metadata-Version: 2.4
2
+ Name: devcull
3
+ Version: 0.1.0
4
+ Summary: remove stale dev cache directories
5
+ License-Expression: MIT
6
+ Project-URL: Repository, https://github.com/gnomecromancer/cull
7
+ Keywords: developer-tools,cache,cleanup,node_modules
8
+ Classifier: Environment :: Console
9
+ Classifier: Operating System :: OS Independent
10
+ Classifier: Programming Language :: Python :: 3
11
+ Classifier: Topic :: Utilities
12
+ Requires-Python: >=3.10
13
+ Description-Content-Type: text/markdown
14
+ Requires-Dist: click>=8.0
15
+ Requires-Dist: rich>=13.0
16
+
17
+ # cull
18
+
19
+ I ran WinDirStat one afternoon because my SSD was at 94% and found 74 GB of `node_modules` from projects I hadn't touched in over a year. Deleting them by hand is tedious. This does it for you.
20
+
21
+ ```
22
+ $ cull ~/projects
23
+ found 23 stale cache dirs totaling 61.3 GB
24
+
25
+ # path size last commit
26
+ 1 ~/projects/old-saas/node_modules 18.2 GB 14mo ago
27
+ 2 ~/projects/prototype-v2/node_modules 9.4 GB 11mo ago
28
+ 3 ~/projects/prototype-v2/.next 2.1 GB 11mo ago
29
+ ...
30
+
31
+ > a
32
+ delete 23 dir(s) (61.3 GB)? [y/N]: y
33
+ freed 61.3 GB
34
+ ```
35
+
36
+ ## install
37
+
38
+ ```
39
+ pip install cull
40
+ ```
41
+
42
+ ## usage
43
+
44
+ ```
45
+ cull [PATH] scan PATH (default: current dir)
46
+ --older-than DAYS only show caches untouched for N days (default: 90)
47
+ --delete interactively pick what to remove
48
+ --all delete everything found without asking
49
+ --dry-run show what would go, don't touch anything
50
+ ```
51
+
52
+ ## what it looks for
53
+
54
+ - `node_modules`, `.next`, `.nuxt`, `.svelte-kit`, `.parcel-cache`, `.turbo`
55
+ - `.venv`, `venv`, `.virtualenv`, `.tox`
56
+ - `__pycache__`, `.pytest_cache`, `.mypy_cache`, `.ruff_cache`
57
+ - `dist`, `build`, `out` (inside project directories)
58
+ - `.gradle`, `.angular`, `.sass-cache`
59
+ - `target` (only inside Rust or Maven projects)
60
+
61
+ ## what it won't do
62
+
63
+ It won't scan for "large files" generically. It won't suggest you delete your Downloads folder or anything outside the above list. The whole point is that it only removes things you can safely recreate by running `npm install` or `pip install` again.
64
+
65
+ It also won't run automatically or add itself to your startup. You run it when you want to run it.
66
+
67
+ ## "last commit" column
68
+
69
+ cull tries to find the last git commit in the parent project. If there's no git repo, it falls back to the directory's modification time. The commit date is more reliable — copying files around updates mtime but doesn't change when you actually worked on the project.
70
+
71
+ ## license
72
+
73
+ MIT
@@ -0,0 +1,9 @@
1
+ cull/__init__.py,sha256=kUR5RAFc7HCeiqdlX36dZOHkUI5wI6V_43RpEcD8b-0,22
2
+ cull/__main__.py,sha256=tBonSz1D22vdWC05SzTjRxMyJGHE07Upl-Wsciig7Lo,31
3
+ cull/cli.py,sha256=0GNCS0HsCILZ3HFyPhWBdMireRqQUUrBynaI_dnsVJo,5786
4
+ cull/scan.py,sha256=S6eg4Zz6Yr2gSfKoi0rVeP1Ck-OrTtXF6_wpiwglgAI,4415
5
+ devcull-0.1.0.dist-info/METADATA,sha256=p6HRmTZkN1Uw7TdMXge4uxf6pwfFrTSZXGa5AlPYC1k,2624
6
+ devcull-0.1.0.dist-info/WHEEL,sha256=aeYiig01lYGDzBgS8HxWXOg3uV61G9ijOsup-k9o1sk,91
7
+ devcull-0.1.0.dist-info/entry_points.txt,sha256=WW4h2nPFJjgc74nd6oPUaBYsf3HGfmkPa_IVil9gSSA,38
8
+ devcull-0.1.0.dist-info/top_level.txt,sha256=mnLdGAQAr6_XdZQHse8OrLmQM8PWEa20JMh5ZAeNrls,5
9
+ devcull-0.1.0.dist-info/RECORD,,
@@ -0,0 +1,5 @@
1
+ Wheel-Version: 1.0
2
+ Generator: setuptools (82.0.1)
3
+ Root-Is-Purelib: true
4
+ Tag: py3-none-any
5
+
@@ -0,0 +1,2 @@
1
+ [console_scripts]
2
+ cull = cull.cli:cli
@@ -0,0 +1 @@
1
+ cull