git-reaper 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- git_reaper/__init__.py +8 -0
- git_reaper/__main__.py +6 -0
- git_reaper/_version.py +24 -0
- git_reaper/art.py +60 -0
- git_reaper/cache.py +131 -0
- git_reaper/cli.py +340 -0
- git_reaper/core/__init__.py +1 -0
- git_reaper/core/harvest.py +81 -0
- git_reaper/core/provenance.py +27 -0
- git_reaper/core/pulse.py +75 -0
- git_reaper/core/source.py +70 -0
- git_reaper/core/tree.py +58 -0
- git_reaper/formatters/__init__.py +2 -0
- git_reaper/formatters/jsonfmt.py +22 -0
- git_reaper/formatters/markdown.py +81 -0
- git_reaper/fsutil.py +99 -0
- git_reaper/gitio/__init__.py +11 -0
- git_reaper/gitio/backend.py +46 -0
- git_reaper/gitio/subprocess_git.py +102 -0
- git_reaper/ignore.py +60 -0
- git_reaper/models.py +123 -0
- git_reaper/schemas.py +97 -0
- git_reaper/theme.py +51 -0
- git_reaper-0.1.0.dist-info/METADATA +149 -0
- git_reaper-0.1.0.dist-info/RECORD +28 -0
- git_reaper-0.1.0.dist-info/WHEEL +4 -0
- git_reaper-0.1.0.dist-info/entry_points.txt +3 -0
- git_reaper-0.1.0.dist-info/licenses/LICENSE +21 -0
git_reaper/__init__.py
ADDED
|
@@ -0,0 +1,8 @@
|
|
|
1
|
+
"""git-reaper: reap structured knowledge from repositories."""
|
|
2
|
+
|
|
3
|
+
try:
|
|
4
|
+
from git_reaper._version import __version__
|
|
5
|
+
except ImportError: # no VCS metadata and no build hook output; should not happen in installs
|
|
6
|
+
__version__ = "0.0.0"
|
|
7
|
+
|
|
8
|
+
__all__ = ["__version__"]
|
git_reaper/__main__.py
ADDED
git_reaper/_version.py
ADDED
|
@@ -0,0 +1,24 @@
|
|
|
1
|
+
# file generated by vcs-versioning
|
|
2
|
+
# don't change, don't track in version control
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
__all__ = [
|
|
6
|
+
"__version__",
|
|
7
|
+
"__version_tuple__",
|
|
8
|
+
"version",
|
|
9
|
+
"version_tuple",
|
|
10
|
+
"__commit_id__",
|
|
11
|
+
"commit_id",
|
|
12
|
+
]
|
|
13
|
+
|
|
14
|
+
version: str
|
|
15
|
+
__version__: str
|
|
16
|
+
__version_tuple__: tuple[int | str, ...]
|
|
17
|
+
version_tuple: tuple[int | str, ...]
|
|
18
|
+
commit_id: str | None
|
|
19
|
+
__commit_id__: str | None
|
|
20
|
+
|
|
21
|
+
__version__ = version = '0.1.0'
|
|
22
|
+
__version_tuple__ = version_tuple = (0, 1, 0)
|
|
23
|
+
|
|
24
|
+
__commit_id__ = commit_id = None
|
git_reaper/art.py
ADDED
|
@@ -0,0 +1,60 @@
|
|
|
1
|
+
"""The skull gallery: banners, tombstones, and spinner frames.
|
|
2
|
+
|
|
3
|
+
Everything here is decoration. Every caller must honor --plain / NO_COLOR
|
|
4
|
+
by simply not calling into this module (see theme.theme_enabled).
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
from __future__ import annotations
|
|
8
|
+
|
|
9
|
+
import random
|
|
10
|
+
|
|
11
|
+
HERO_SKULL = r"""
|
|
12
|
+
______
|
|
13
|
+
.-" "-.
|
|
14
|
+
/ \
|
|
15
|
+
|, .-. .-. ,|
|
|
16
|
+
| )(_o/ \o_)( |
|
|
17
|
+
|/ /\ \|
|
|
18
|
+
(_ ^^ _)
|
|
19
|
+
\__|IIIIII|__/
|
|
20
|
+
| \IIIIII/ |
|
|
21
|
+
\ /
|
|
22
|
+
`--------`
|
|
23
|
+
g i t - r e a p e r
|
|
24
|
+
"""
|
|
25
|
+
|
|
26
|
+
# Chosen automatically for skinny terminals.
|
|
27
|
+
NARROW_SKULL = r"""
|
|
28
|
+
.-.
|
|
29
|
+
(o.o)
|
|
30
|
+
|=|
|
|
31
|
+
git-reaper
|
|
32
|
+
"""
|
|
33
|
+
|
|
34
|
+
MINI_SKULL = ".-.\n|x|\n'-'"
|
|
35
|
+
|
|
36
|
+
SCYTHE_FRAMES = ["/", "-", "\\", "|"]
|
|
37
|
+
|
|
38
|
+
TOMBSTONE_DIVIDER = " _______\n | RIP |\n_|_______|_"
|
|
39
|
+
|
|
40
|
+
|
|
41
|
+
def banner(version: str, width: int = 80) -> str:
|
|
42
|
+
"""The CLI banner, sized to the terminal."""
|
|
43
|
+
skull = HERO_SKULL if width >= 40 else NARROW_SKULL
|
|
44
|
+
return f"{skull.rstrip()}\n v{version}\n"
|
|
45
|
+
|
|
46
|
+
|
|
47
|
+
def tombstone(lines: list[str]) -> str:
|
|
48
|
+
"""Render lines of text inside ASCII tombstone art."""
|
|
49
|
+
inner = max(len(line) for line in lines) if lines else 0
|
|
50
|
+
inner = max(inner, 11)
|
|
51
|
+
top = " " + "_" * inner
|
|
52
|
+
body = [f" /{' ' * inner}\\"]
|
|
53
|
+
body.extend(f" | {line.center(inner - 2)} |" for line in ["R I P", "", *lines])
|
|
54
|
+
body.append(" ___|" + "_" * inner + "|___")
|
|
55
|
+
return "\n".join([top, *body])
|
|
56
|
+
|
|
57
|
+
|
|
58
|
+
def boo() -> str:
|
|
59
|
+
"""A random piece from the gallery, for the hidden `reaper boo`."""
|
|
60
|
+
return random.choice([HERO_SKULL, NARROW_SKULL, MINI_SKULL, TOMBSTONE_DIVIDER])
|
git_reaper/cache.py
ADDED
|
@@ -0,0 +1,131 @@
|
|
|
1
|
+
"""The catacombs: the clone cache.
|
|
2
|
+
|
|
3
|
+
Remote clones land in a content-addressed cache under
|
|
4
|
+
``~/.cache/git-reaper/catacombs/<host>/<owner>/<repo>``, shallow by default,
|
|
5
|
+
reused across runs, and cleared by ``banish``. Local ``file://`` sources are
|
|
6
|
+
buried flat as ``localhost/<name>-<digest>`` to stay inside Windows path
|
|
7
|
+
limits.
|
|
8
|
+
"""
|
|
9
|
+
|
|
10
|
+
from __future__ import annotations
|
|
11
|
+
|
|
12
|
+
import hashlib
|
|
13
|
+
import os
|
|
14
|
+
import re
|
|
15
|
+
import time
|
|
16
|
+
from pathlib import Path
|
|
17
|
+
from urllib.parse import urlparse
|
|
18
|
+
|
|
19
|
+
from git_reaper import fsutil
|
|
20
|
+
from git_reaper.models import BanishResult, CacheEntry
|
|
21
|
+
|
|
22
|
+
_SCP_RE = re.compile(r"^(?:\w+@)?(?P<host>[\w.-]+):(?P<path>.+)$")
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
def catacombs_root() -> Path:
|
|
26
|
+
"""Cache root, overridable via GIT_REAPER_CACHE for tests and CI."""
|
|
27
|
+
override = os.environ.get("GIT_REAPER_CACHE")
|
|
28
|
+
if override:
|
|
29
|
+
return Path(override)
|
|
30
|
+
xdg = os.environ.get("XDG_CACHE_HOME") or str(Path.home() / ".cache")
|
|
31
|
+
return Path(xdg) / "git-reaper" / "catacombs"
|
|
32
|
+
|
|
33
|
+
|
|
34
|
+
def _sanitize(part: str) -> str:
|
|
35
|
+
part = part.strip("/").removesuffix(".git")
|
|
36
|
+
return re.sub(r"[^\w.-]", "_", part) or "_"
|
|
37
|
+
|
|
38
|
+
|
|
39
|
+
def grave_path(url: str) -> Path:
|
|
40
|
+
"""Map a remote URL to its plot in the catacombs."""
|
|
41
|
+
parsed = urlparse(url)
|
|
42
|
+
if parsed.scheme == "file":
|
|
43
|
+
# Tolerate Windows spellings (file://C:\repos\x): backslashes never
|
|
44
|
+
# delimit for urlparse, so the drive letter lands in netloc.
|
|
45
|
+
path = parsed.path.replace("\\", "/")
|
|
46
|
+
if re.match(r"^[A-Za-z]:", parsed.netloc):
|
|
47
|
+
path = parsed.netloc.replace("\\", "/") + path
|
|
48
|
+
path = path.strip("/")
|
|
49
|
+
if not path:
|
|
50
|
+
raise ValueError(f"URL has no repository path: {url!r}")
|
|
51
|
+
# Local paths can be arbitrarily deep; mirroring them under the
|
|
52
|
+
# catacombs would breach Windows' 260-char path limit. Bury them
|
|
53
|
+
# flat: basename plus a short digest of the full path.
|
|
54
|
+
digest = hashlib.sha256(path.encode("utf-8")).hexdigest()[:12]
|
|
55
|
+
name = _sanitize(path.rsplit("/", 1)[-1])
|
|
56
|
+
return catacombs_root() / "localhost" / f"{name}-{digest}"
|
|
57
|
+
if parsed.scheme:
|
|
58
|
+
host, path = parsed.netloc or "localhost", parsed.path
|
|
59
|
+
else:
|
|
60
|
+
scp = _SCP_RE.match(url)
|
|
61
|
+
if not scp:
|
|
62
|
+
raise ValueError(f"cannot read this incantation as a repo URL: {url!r}")
|
|
63
|
+
host, path = scp.group("host"), scp.group("path")
|
|
64
|
+
segments = [_sanitize(seg) for seg in path.strip("/").split("/") if seg]
|
|
65
|
+
if not segments:
|
|
66
|
+
raise ValueError(f"URL has no repository path: {url!r}")
|
|
67
|
+
return catacombs_root() / _sanitize(host) / Path(*segments)
|
|
68
|
+
|
|
69
|
+
|
|
70
|
+
def _dir_size(path: Path) -> int:
|
|
71
|
+
return sum(f.stat().st_size for f in path.rglob("*") if f.is_file())
|
|
72
|
+
|
|
73
|
+
|
|
74
|
+
def list_graves() -> list[CacheEntry]:
|
|
75
|
+
"""Every interred repo, oldest first."""
|
|
76
|
+
root = catacombs_root()
|
|
77
|
+
entries: list[CacheEntry] = []
|
|
78
|
+
if not root.is_dir():
|
|
79
|
+
return entries
|
|
80
|
+
for git_dir in sorted(root.rglob(".git")):
|
|
81
|
+
repo = git_dir.parent
|
|
82
|
+
marker = repo / ".git-reaper-url"
|
|
83
|
+
url = marker.read_text(encoding="utf-8").strip() if marker.is_file() else ""
|
|
84
|
+
entries.append(
|
|
85
|
+
CacheEntry(
|
|
86
|
+
path=str(repo),
|
|
87
|
+
url=url,
|
|
88
|
+
size_bytes=_dir_size(repo),
|
|
89
|
+
last_used=repo.stat().st_mtime,
|
|
90
|
+
)
|
|
91
|
+
)
|
|
92
|
+
entries.sort(key=lambda e: e.last_used)
|
|
93
|
+
return entries
|
|
94
|
+
|
|
95
|
+
|
|
96
|
+
def mark_grave(repo_path: Path, url: str) -> None:
|
|
97
|
+
"""Record the source URL and refresh the last-used stamp."""
|
|
98
|
+
(repo_path / ".git-reaper-url").write_text(url + "\n", encoding="utf-8")
|
|
99
|
+
os.utime(repo_path)
|
|
100
|
+
|
|
101
|
+
|
|
102
|
+
def banish(older_than_seconds: float | None = None) -> BanishResult:
|
|
103
|
+
"""Clear the catacombs. With older_than, a partial exorcism."""
|
|
104
|
+
result = BanishResult()
|
|
105
|
+
cutoff = time.time() - older_than_seconds if older_than_seconds is not None else None
|
|
106
|
+
for entry in list_graves():
|
|
107
|
+
if cutoff is not None and entry.last_used > cutoff:
|
|
108
|
+
result.kept.append(entry)
|
|
109
|
+
continue
|
|
110
|
+
try:
|
|
111
|
+
fsutil.force_rmtree(entry.path)
|
|
112
|
+
except OSError:
|
|
113
|
+
# A grave we cannot dig up (locked file?) is kept, not "removed".
|
|
114
|
+
result.kept.append(entry)
|
|
115
|
+
continue
|
|
116
|
+
result.removed.append(entry)
|
|
117
|
+
result.reclaimed_bytes += entry.size_bytes
|
|
118
|
+
return result
|
|
119
|
+
|
|
120
|
+
|
|
121
|
+
_AGE_RE = re.compile(r"^\s*(\d+)\s*([smhdw])\s*$", re.IGNORECASE)
|
|
122
|
+
_AGE_UNITS = {"s": 1, "m": 60, "h": 3600, "d": 86400, "w": 604800}
|
|
123
|
+
|
|
124
|
+
|
|
125
|
+
def parse_age(text: str) -> float:
|
|
126
|
+
"""Parse '7d', '12h', '90m' into seconds. Raises ValueError."""
|
|
127
|
+
match = _AGE_RE.match(text)
|
|
128
|
+
if not match:
|
|
129
|
+
raise ValueError(f"unreadable age: {text!r} (try '7d', '12h', '30m')")
|
|
130
|
+
value, unit = match.groups()
|
|
131
|
+
return int(value) * _AGE_UNITS[unit.lower()]
|
git_reaper/cli.py
ADDED
|
@@ -0,0 +1,340 @@
|
|
|
1
|
+
"""The CLI face of the reaper: a thin Typer adapter over git_reaper.core.
|
|
2
|
+
|
|
3
|
+
Rules of the house:
|
|
4
|
+
- Artifacts go to --out or stdout. Narration goes to stderr, always.
|
|
5
|
+
- Every themed message still carries the plain cause and a next step.
|
|
6
|
+
- Exit codes: 0 rest in peace, 1 the ritual failed, 2 bad incantation.
|
|
7
|
+
"""
|
|
8
|
+
|
|
9
|
+
from __future__ import annotations
|
|
10
|
+
|
|
11
|
+
import json
|
|
12
|
+
import shlex
|
|
13
|
+
import sys
|
|
14
|
+
from dataclasses import dataclass
|
|
15
|
+
from pathlib import Path
|
|
16
|
+
|
|
17
|
+
import typer
|
|
18
|
+
from rich.console import Console
|
|
19
|
+
from rich.markup import escape
|
|
20
|
+
from rich.table import Table
|
|
21
|
+
|
|
22
|
+
from git_reaper import __version__, art, cache, fsutil, schemas
|
|
23
|
+
from git_reaper.core import harvest as harvest_core
|
|
24
|
+
from git_reaper.core import pulse as pulse_core
|
|
25
|
+
from git_reaper.core import tree as tree_core
|
|
26
|
+
from git_reaper.core.source import resolve_source
|
|
27
|
+
from git_reaper.formatters import jsonfmt, markdown
|
|
28
|
+
from git_reaper.gitio import GitError
|
|
29
|
+
from git_reaper.theme import make_console, theme_enabled
|
|
30
|
+
|
|
31
|
+
app = typer.Typer(
|
|
32
|
+
name="reaper",
|
|
33
|
+
help="A spooky utility for data mining git repositories.",
|
|
34
|
+
no_args_is_help=True,
|
|
35
|
+
context_settings={"help_option_names": ["-h", "--help"]},
|
|
36
|
+
)
|
|
37
|
+
|
|
38
|
+
|
|
39
|
+
@dataclass
|
|
40
|
+
class State:
|
|
41
|
+
plain: bool = False
|
|
42
|
+
verbosity: int = 0 # -1 whisper, 0 default, 1 moan, 2 shriek
|
|
43
|
+
console: Console = None # type: ignore[assignment]
|
|
44
|
+
|
|
45
|
+
|
|
46
|
+
state = State()
|
|
47
|
+
|
|
48
|
+
|
|
49
|
+
def _say(style: str, message: str, level: int = 0) -> None:
|
|
50
|
+
"""Narrate to stderr if the current verbosity allows it."""
|
|
51
|
+
if state.verbosity >= level:
|
|
52
|
+
state.console.print(f"[{style}]\\[{style}][/{style}] {escape(message)}")
|
|
53
|
+
|
|
54
|
+
|
|
55
|
+
def _die(message: str, hint: str | None = None) -> typer.Exit:
|
|
56
|
+
_say("blood", f"the ritual failed: {message}")
|
|
57
|
+
if hint:
|
|
58
|
+
_say("ash", f"next step: {hint}")
|
|
59
|
+
return typer.Exit(code=1)
|
|
60
|
+
|
|
61
|
+
|
|
62
|
+
def _invocation() -> str:
|
|
63
|
+
return "reaper " + " ".join(shlex.quote(a) for a in sys.argv[1:])
|
|
64
|
+
|
|
65
|
+
|
|
66
|
+
def _emit(text: str, out: Path | None) -> None:
|
|
67
|
+
"""Write an artifact to --out or stdout. Chatter never comes here."""
|
|
68
|
+
if out:
|
|
69
|
+
out.parent.mkdir(parents=True, exist_ok=True)
|
|
70
|
+
out.write_text(text, encoding="utf-8")
|
|
71
|
+
else:
|
|
72
|
+
sys.stdout.write(text)
|
|
73
|
+
|
|
74
|
+
|
|
75
|
+
def _print_schema(command: str) -> None:
|
|
76
|
+
schema = schemas.schema_for(schemas.COMMAND_MODELS[command])
|
|
77
|
+
sys.stdout.write(json.dumps(schema, indent=2) + "\n")
|
|
78
|
+
|
|
79
|
+
|
|
80
|
+
def _validate_format(fmt: str) -> None:
|
|
81
|
+
if fmt not in ("md", "json"):
|
|
82
|
+
raise _die(f"unknown format {fmt!r}", "use --format md or --format json")
|
|
83
|
+
|
|
84
|
+
|
|
85
|
+
def _version_callback(value: bool) -> None:
|
|
86
|
+
if value:
|
|
87
|
+
sys.stdout.write(f"git-reaper {__version__}\n")
|
|
88
|
+
raise typer.Exit()
|
|
89
|
+
|
|
90
|
+
|
|
91
|
+
@app.callback()
|
|
92
|
+
def main(
|
|
93
|
+
plain: bool = typer.Option(
|
|
94
|
+
False, "--plain", "--no-theme", help="Clean ASCII output; no color, no art."
|
|
95
|
+
),
|
|
96
|
+
whisper: bool = typer.Option(False, "-q", "--whisper", help="Only errors."),
|
|
97
|
+
verbose: int = typer.Option(
|
|
98
|
+
0, "-v", "--moan", count=True, help="More narration; -vv (--shriek) for debug."
|
|
99
|
+
),
|
|
100
|
+
version: bool = typer.Option(
|
|
101
|
+
False, "--version", callback=_version_callback, is_eager=True, help="Print version."
|
|
102
|
+
),
|
|
103
|
+
) -> None:
|
|
104
|
+
state.plain = plain
|
|
105
|
+
state.verbosity = -1 if whisper else verbose
|
|
106
|
+
state.console = make_console(plain=plain, quiet=False)
|
|
107
|
+
|
|
108
|
+
|
|
109
|
+
def _banner() -> None:
|
|
110
|
+
if theme_enabled(state.plain) and state.verbosity >= 0:
|
|
111
|
+
state.console.print(f"[eldritch]{art.MINI_SKULL}[/eldritch]", highlight=False)
|
|
112
|
+
|
|
113
|
+
|
|
114
|
+
# --------------------------------------------------------------------------
|
|
115
|
+
# harvest (reap)
|
|
116
|
+
# --------------------------------------------------------------------------
|
|
117
|
+
|
|
118
|
+
|
|
119
|
+
def _harvest_impl(
|
|
120
|
+
source: str,
|
|
121
|
+
pattern: list[str],
|
|
122
|
+
exclude: list[str],
|
|
123
|
+
out: Path | None,
|
|
124
|
+
ref: str | None,
|
|
125
|
+
depth: int,
|
|
126
|
+
max_file_size: str | None,
|
|
127
|
+
max_total_size: str | None,
|
|
128
|
+
include_binary: bool,
|
|
129
|
+
) -> None:
|
|
130
|
+
"""Gather files matching a pattern and concatenate them into one artifact."""
|
|
131
|
+
_banner()
|
|
132
|
+
try:
|
|
133
|
+
file_cap = fsutil.parse_size(max_file_size) if max_file_size else None
|
|
134
|
+
total_cap = fsutil.parse_size(max_total_size) if max_total_size else None
|
|
135
|
+
except ValueError as exc:
|
|
136
|
+
raise _die(str(exc)) from exc
|
|
137
|
+
|
|
138
|
+
try:
|
|
139
|
+
resolved = resolve_source(source, ref=ref, depth=depth)
|
|
140
|
+
except (FileNotFoundError, ValueError, GitError) as exc:
|
|
141
|
+
raise _die(str(exc), "check the path or URL; `reaper pulse` checks your setup") from exc
|
|
142
|
+
if resolved.cached:
|
|
143
|
+
_say("necro", f"catacombs hit: {resolved.repo.source} already interred, reusing")
|
|
144
|
+
|
|
145
|
+
patterns = tuple(pattern) if pattern else harvest_core.DEFAULT_PATTERNS
|
|
146
|
+
try:
|
|
147
|
+
result = harvest_core.harvest(
|
|
148
|
+
resolved.repo,
|
|
149
|
+
patterns=patterns,
|
|
150
|
+
excludes=exclude,
|
|
151
|
+
max_file_size=file_cap,
|
|
152
|
+
max_total_size=total_cap,
|
|
153
|
+
include_binary=include_binary,
|
|
154
|
+
invoked=_invocation(),
|
|
155
|
+
)
|
|
156
|
+
except harvest_core.CapExceeded as exc:
|
|
157
|
+
raise _die(str(exc)) from exc
|
|
158
|
+
|
|
159
|
+
_say(
|
|
160
|
+
"necro",
|
|
161
|
+
f"gathered {len(result.files)} souls ({', '.join(patterns)}) ... "
|
|
162
|
+
f"{result.total_lines:,} lines, {fsutil.human_size(result.total_bytes)}",
|
|
163
|
+
)
|
|
164
|
+
for skipped in result.skipped:
|
|
165
|
+
_say("ember", f"skipped {skipped.path}: {skipped.skip_reason}", level=0)
|
|
166
|
+
|
|
167
|
+
if out:
|
|
168
|
+
with out.open("w", encoding="utf-8") as fh:
|
|
169
|
+
markdown.write_harvest(result, fh)
|
|
170
|
+
_say(
|
|
171
|
+
"bone",
|
|
172
|
+
f"wrote {out} ({len(result.files)} files, ~{result.token_estimate:,} tokens)",
|
|
173
|
+
)
|
|
174
|
+
else:
|
|
175
|
+
markdown.write_harvest(result, sys.stdout)
|
|
176
|
+
_say("ash", "the reaping is complete.")
|
|
177
|
+
|
|
178
|
+
|
|
179
|
+
@app.command("harvest")
|
|
180
|
+
def harvest_cmd(
|
|
181
|
+
source: str | None = typer.Argument(None, help="Local path or repo URL."),
|
|
182
|
+
pattern: list[str] = typer.Option(
|
|
183
|
+
[], "--pattern", "--glob", "-p", help="Glob(s) to gather (default: *.md)."
|
|
184
|
+
),
|
|
185
|
+
exclude: list[str] = typer.Option([], "--exclude", "-x", help="Glob(s) to skip."),
|
|
186
|
+
out: Path | None = typer.Option(None, "--out", "-o", help="Output file (default stdout)."),
|
|
187
|
+
ref: str | None = typer.Option(None, "--ref", help="Branch, tag, or sha (remote sources)."),
|
|
188
|
+
depth: int = typer.Option(1, "--depth", help="Clone depth for remote sources."),
|
|
189
|
+
max_file_size: str | None = typer.Option(
|
|
190
|
+
None, "--max-file-size", help="Skip files larger than this (e.g. 1MB)."
|
|
191
|
+
),
|
|
192
|
+
max_total_size: str | None = typer.Option(
|
|
193
|
+
None, "--max-total-size", help="Abort past this total (e.g. 100MB)."
|
|
194
|
+
),
|
|
195
|
+
include_binary: bool = typer.Option(False, "--include-binary", help="Do not skip binaries."),
|
|
196
|
+
schema: bool = typer.Option(False, "--schema", help="Print the JSON schema and exit."),
|
|
197
|
+
) -> None:
|
|
198
|
+
"""Gather files matching a pattern into one flat artifact."""
|
|
199
|
+
if schema:
|
|
200
|
+
_print_schema("harvest")
|
|
201
|
+
return
|
|
202
|
+
if source is None:
|
|
203
|
+
raise _die("no source given", "pass a local path or a repo URL")
|
|
204
|
+
_harvest_impl(
|
|
205
|
+
source,
|
|
206
|
+
pattern,
|
|
207
|
+
exclude,
|
|
208
|
+
out,
|
|
209
|
+
ref,
|
|
210
|
+
depth,
|
|
211
|
+
max_file_size,
|
|
212
|
+
max_total_size,
|
|
213
|
+
include_binary,
|
|
214
|
+
)
|
|
215
|
+
|
|
216
|
+
|
|
217
|
+
# --------------------------------------------------------------------------
|
|
218
|
+
# tree (map)
|
|
219
|
+
# --------------------------------------------------------------------------
|
|
220
|
+
|
|
221
|
+
|
|
222
|
+
@app.command("tree")
|
|
223
|
+
def tree_cmd(
|
|
224
|
+
source: str = typer.Argument(".", help="Local path or repo URL."),
|
|
225
|
+
depth: int | None = typer.Option(None, "--depth", "-d", help="Max depth."),
|
|
226
|
+
dirs_only: bool = typer.Option(False, "--dirs-only", help="Directories only."),
|
|
227
|
+
sizes: bool = typer.Option(False, "--sizes", help="Show file sizes."),
|
|
228
|
+
lines: bool = typer.Option(False, "--lines", help="Show line counts."),
|
|
229
|
+
exclude: list[str] = typer.Option([], "--exclude", "-x", help="Glob(s) to skip."),
|
|
230
|
+
fmt: str = typer.Option("md", "--format", "-f", help="md or json."),
|
|
231
|
+
out: Path | None = typer.Option(None, "--out", "-o", help="Output file (default stdout)."),
|
|
232
|
+
ref: str | None = typer.Option(None, "--ref", help="Branch, tag, or sha (remote sources)."),
|
|
233
|
+
schema: bool = typer.Option(False, "--schema", help="Print the JSON schema and exit."),
|
|
234
|
+
) -> None:
|
|
235
|
+
"""Emit a hierarchical file listing."""
|
|
236
|
+
if schema:
|
|
237
|
+
_print_schema("tree")
|
|
238
|
+
return
|
|
239
|
+
_validate_format(fmt)
|
|
240
|
+
_banner()
|
|
241
|
+
try:
|
|
242
|
+
resolved = resolve_source(source, ref=ref)
|
|
243
|
+
except (FileNotFoundError, ValueError, GitError) as exc:
|
|
244
|
+
raise _die(str(exc), "check the path or URL; `reaper pulse` checks your setup") from exc
|
|
245
|
+
|
|
246
|
+
result = tree_core.tree(
|
|
247
|
+
resolved.repo,
|
|
248
|
+
max_depth=depth,
|
|
249
|
+
dirs_only=dirs_only,
|
|
250
|
+
with_sizes=sizes,
|
|
251
|
+
with_lines=lines,
|
|
252
|
+
excludes=exclude,
|
|
253
|
+
invoked=_invocation(),
|
|
254
|
+
)
|
|
255
|
+
_say("necro", f"mapped {result.dir_count} crypts, {result.file_count} souls")
|
|
256
|
+
if fmt == "json":
|
|
257
|
+
_emit(jsonfmt.render(result), out)
|
|
258
|
+
else:
|
|
259
|
+
_emit(markdown.render_tree(result, with_sizes=sizes, with_lines=lines), out)
|
|
260
|
+
|
|
261
|
+
|
|
262
|
+
# --------------------------------------------------------------------------
|
|
263
|
+
# pulse (doctor)
|
|
264
|
+
# --------------------------------------------------------------------------
|
|
265
|
+
|
|
266
|
+
|
|
267
|
+
@app.command("pulse")
|
|
268
|
+
def pulse_cmd(
|
|
269
|
+
fmt: str = typer.Option("md", "--format", "-f", help="md or json."),
|
|
270
|
+
schema: bool = typer.Option(False, "--schema", help="Print the JSON schema and exit."),
|
|
271
|
+
) -> None:
|
|
272
|
+
"""Signs-of-life check: git, extras, cache health."""
|
|
273
|
+
if schema:
|
|
274
|
+
_print_schema("pulse")
|
|
275
|
+
return
|
|
276
|
+
_validate_format(fmt)
|
|
277
|
+
result = pulse_core.pulse()
|
|
278
|
+
if fmt == "json":
|
|
279
|
+
_emit(jsonfmt.render(result), None)
|
|
280
|
+
else:
|
|
281
|
+
table = Table(title="signs of life", title_style="eldritch", border_style="grave")
|
|
282
|
+
table.add_column("check", style="bone")
|
|
283
|
+
table.add_column("", justify="center")
|
|
284
|
+
table.add_column("detail", style="ash")
|
|
285
|
+
for check in result.checks:
|
|
286
|
+
mark = "[necro]ok[/necro]" if check.ok else "[blood]DEAD[/blood]"
|
|
287
|
+
# escape: details like "[git] extra" are text, not Rich markup
|
|
288
|
+
table.add_row(check.name, mark, escape(check.detail))
|
|
289
|
+
state.console.print(table)
|
|
290
|
+
if not result.ok:
|
|
291
|
+
_say("blood", "the patient is unwell; fix the DEAD rows above")
|
|
292
|
+
raise typer.Exit(code=1)
|
|
293
|
+
_say("necro", "there is a pulse. faint, but there.")
|
|
294
|
+
|
|
295
|
+
|
|
296
|
+
# --------------------------------------------------------------------------
|
|
297
|
+
# banish (purge)
|
|
298
|
+
# --------------------------------------------------------------------------
|
|
299
|
+
|
|
300
|
+
|
|
301
|
+
@app.command("banish")
|
|
302
|
+
def banish_cmd(
|
|
303
|
+
older_than: str | None = typer.Option(
|
|
304
|
+
None, "--older-than", help="Only clear graves older than this (e.g. 7d, 12h)."
|
|
305
|
+
),
|
|
306
|
+
schema: bool = typer.Option(False, "--schema", help="Print the JSON schema and exit."),
|
|
307
|
+
) -> None:
|
|
308
|
+
"""Clear the catacombs (the clone cache)."""
|
|
309
|
+
if schema:
|
|
310
|
+
_print_schema("banish")
|
|
311
|
+
return
|
|
312
|
+
_banner()
|
|
313
|
+
try:
|
|
314
|
+
age = cache.parse_age(older_than) if older_than else None
|
|
315
|
+
except ValueError as exc:
|
|
316
|
+
raise _die(str(exc)) from exc
|
|
317
|
+
result = cache.banish(older_than_seconds=age)
|
|
318
|
+
for entry in result.removed:
|
|
319
|
+
_say("ember", f"banished {entry.url or entry.path}", level=1)
|
|
320
|
+
_say(
|
|
321
|
+
"necro",
|
|
322
|
+
f"banished {len(result.removed)} graves, kept {len(result.kept)}, "
|
|
323
|
+
f"reclaimed {fsutil.human_size(result.reclaimed_bytes)}",
|
|
324
|
+
)
|
|
325
|
+
|
|
326
|
+
|
|
327
|
+
# --------------------------------------------------------------------------
|
|
328
|
+
# easter egg
|
|
329
|
+
# --------------------------------------------------------------------------
|
|
330
|
+
|
|
331
|
+
|
|
332
|
+
@app.command("boo", hidden=True)
|
|
333
|
+
def boo_cmd() -> None:
|
|
334
|
+
"""A random piece from the gallery."""
|
|
335
|
+
if not state.plain:
|
|
336
|
+
state.console.print(f"[eldritch]{art.boo()}[/eldritch]", highlight=False)
|
|
337
|
+
|
|
338
|
+
|
|
339
|
+
def run() -> None: # pragma: no cover - console-script shim
|
|
340
|
+
app()
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
"""The engine. Everything here returns models, never formatted strings."""
|
|
@@ -0,0 +1,81 @@
|
|
|
1
|
+
"""The flagship: gather files matching patterns and prepare them for
|
|
2
|
+
concatenation. Returns a HarvestResult; rendering lives in formatters/."""
|
|
3
|
+
|
|
4
|
+
from __future__ import annotations
|
|
5
|
+
|
|
6
|
+
import fnmatch
|
|
7
|
+
from pathlib import Path
|
|
8
|
+
|
|
9
|
+
from git_reaper import fsutil, schemas
|
|
10
|
+
from git_reaper.core.provenance import make_provenance
|
|
11
|
+
from git_reaper.ignore import IgnoreMatcher, walk_files
|
|
12
|
+
from git_reaper.models import FileEntry, HarvestResult, RepoRef
|
|
13
|
+
|
|
14
|
+
DEFAULT_PATTERNS = ("*.md",)
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
class CapExceeded(RuntimeError):
|
|
18
|
+
"""The total size cap was hit. The message says exactly where."""
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
def _matches(rel_path: str, patterns: tuple[str, ...]) -> bool:
|
|
22
|
+
name = rel_path.rsplit("/", 1)[-1]
|
|
23
|
+
return any(fnmatch.fnmatch(name, pat) or fnmatch.fnmatch(rel_path, pat) for pat in patterns)
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
def harvest(
|
|
27
|
+
repo: RepoRef,
|
|
28
|
+
patterns: tuple[str, ...] = DEFAULT_PATTERNS,
|
|
29
|
+
excludes: list[str] | None = None,
|
|
30
|
+
max_file_size: int | None = None,
|
|
31
|
+
max_total_size: int | None = None,
|
|
32
|
+
include_binary: bool = False,
|
|
33
|
+
invoked: str = "reaper harvest",
|
|
34
|
+
generated: str | None = None,
|
|
35
|
+
) -> HarvestResult:
|
|
36
|
+
"""Gather every matching file under the resolved source.
|
|
37
|
+
|
|
38
|
+
Skips are never silent: each skipped file is recorded with its reason
|
|
39
|
+
so the report can show exactly what was left in the ground.
|
|
40
|
+
"""
|
|
41
|
+
root = Path(repo.path)
|
|
42
|
+
matcher = IgnoreMatcher(root, extra_excludes=excludes)
|
|
43
|
+
result = HarvestResult(
|
|
44
|
+
provenance=make_provenance(schemas.artifact_schema("harvest"), repo, invoked, generated),
|
|
45
|
+
root=str(root),
|
|
46
|
+
)
|
|
47
|
+
|
|
48
|
+
for path in walk_files(root, matcher):
|
|
49
|
+
rel = path.relative_to(root).as_posix()
|
|
50
|
+
if not _matches(rel, patterns):
|
|
51
|
+
continue
|
|
52
|
+
size = path.stat().st_size
|
|
53
|
+
if max_file_size is not None and size > max_file_size:
|
|
54
|
+
result.skipped.append(
|
|
55
|
+
FileEntry(
|
|
56
|
+
path=rel,
|
|
57
|
+
size_bytes=size,
|
|
58
|
+
skipped=True,
|
|
59
|
+
skip_reason=f"over size cap ({fsutil.human_size(size)})",
|
|
60
|
+
)
|
|
61
|
+
)
|
|
62
|
+
continue
|
|
63
|
+
if not include_binary and fsutil.is_binary(path):
|
|
64
|
+
result.skipped.append(
|
|
65
|
+
FileEntry(path=rel, size_bytes=size, skipped=True, skip_reason="binary")
|
|
66
|
+
)
|
|
67
|
+
continue
|
|
68
|
+
if max_total_size is not None and result.total_bytes + size > max_total_size:
|
|
69
|
+
raise CapExceeded(
|
|
70
|
+
f"total size cap {fsutil.human_size(max_total_size)} reached at {rel}; "
|
|
71
|
+
"raise --max-total-size or narrow the pattern"
|
|
72
|
+
)
|
|
73
|
+
entry = FileEntry(path=rel, size_bytes=size, line_count=fsutil.count_lines(path))
|
|
74
|
+
result.files.append(entry)
|
|
75
|
+
result.total_bytes += size
|
|
76
|
+
result.total_lines += entry.line_count
|
|
77
|
+
|
|
78
|
+
result.token_estimate = fsutil.estimate_tokens(result.total_bytes)
|
|
79
|
+
result.provenance.files = len(result.files)
|
|
80
|
+
result.provenance.token_estimate = result.token_estimate
|
|
81
|
+
return result
|
|
@@ -0,0 +1,27 @@
|
|
|
1
|
+
"""Provenance stamps: every combined artifact says where it came from."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
from datetime import datetime, timezone
|
|
6
|
+
|
|
7
|
+
from git_reaper import __version__
|
|
8
|
+
from git_reaper.models import Provenance, RepoRef
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
def make_provenance(
|
|
12
|
+
schema: str,
|
|
13
|
+
repo: RepoRef,
|
|
14
|
+
invoked: str,
|
|
15
|
+
generated: str | None = None,
|
|
16
|
+
) -> Provenance:
|
|
17
|
+
"""Build the stamp. `generated` is injectable so tests stay deterministic;
|
|
18
|
+
it is the only wall-clock value allowed anywhere in an artifact."""
|
|
19
|
+
return Provenance(
|
|
20
|
+
schema=schema,
|
|
21
|
+
source=repo.source,
|
|
22
|
+
ref=repo.ref,
|
|
23
|
+
sha=repo.sha,
|
|
24
|
+
generated=generated or datetime.now(timezone.utc).strftime("%Y-%m-%dT%H:%M:%SZ"),
|
|
25
|
+
tool_version=__version__,
|
|
26
|
+
invoked=invoked,
|
|
27
|
+
)
|