dug-cli 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
dug/__init__.py ADDED
File without changes
dug/__main__.py ADDED
@@ -0,0 +1,297 @@
1
+ import sys
2
+ from pathlib import Path
3
+
4
+ import click
5
+
6
+ from .config import load_config, save_config, set_config_value, get_dug_dir, DEFAULTS, find_repo_root
7
+
8
+
9
+ class DefaultToQueryGroup(click.Group):
10
+ """Routes `dug "some error"` to the query command when first arg isn't a subcommand."""
11
+
12
+ def parse_args(self, ctx, args):
13
+ if args and not args[0].startswith("-") and args[0] not in self.commands:
14
+ args = ["query"] + args
15
+ return super().parse_args(ctx, args)
16
+
17
+
18
+ # ---------------------------------------------------------------------------
19
+ # Helpers
20
+ # ---------------------------------------------------------------------------
21
+
22
+ LANG_EXTENSIONS = {
23
+ "python": [".py"],
24
+ "java": [".java"],
25
+ "typescript": [".ts", ".tsx"],
26
+ "javascript": [".js", ".jsx"],
27
+ }
28
+
29
+
30
+ def _detect_languages(root: Path) -> list[str]:
31
+ detected = []
32
+ for lang, exts in LANG_EXTENSIONS.items():
33
+ for ext in exts:
34
+ if any(root.rglob(f"*{ext}")):
35
+ detected.append(lang)
36
+ break
37
+ return detected or ["python"]
38
+
39
+
40
+ # ---------------------------------------------------------------------------
41
+ # CLI
42
+ # ---------------------------------------------------------------------------
43
+
44
+ @click.group(cls=DefaultToQueryGroup, invoke_without_command=True)
45
+ @click.pass_context
46
+ def cli(ctx):
47
+ """dug — dig into any bug with full codebase context."""
48
+ if ctx.invoked_subcommand is None:
49
+ click.echo(ctx.get_help())
50
+
51
+
52
+ @cli.command()
53
+ def init():
54
+ """First-time setup — wizard + full index build."""
55
+ click.echo("\nWelcome to dug.\n")
56
+
57
+ # Embedding mode
58
+ click.echo("Embedding mode:")
59
+ click.echo(" 1. Local — no API key, runs on CPU (recommended)")
60
+ click.echo(" 2. OpenAI — needs API key, faster")
61
+ choice = click.prompt("\n>", default="1").strip()
62
+
63
+ cfg = load_config()
64
+
65
+ if choice == "2":
66
+ api_key = click.prompt("OpenAI API key").strip()
67
+ cfg["embedding_mode"] = "openai"
68
+ cfg["api_key"] = api_key
69
+ click.echo("\n✓ Using OpenAI embeddings.")
70
+ else:
71
+ cfg["embedding_mode"] = "local"
72
+ cfg["api_key"] = None
73
+ click.echo("\n✓ Using local embeddings. No API key needed.")
74
+
75
+ # Language detection
76
+ root = find_repo_root()
77
+ detected = _detect_languages(root)
78
+ click.echo(f"\nLanguages detected: {', '.join(detected)}")
79
+ cfg["languages"] = detected
80
+
81
+ # Ignore paths
82
+ click.echo(f"Ignore paths: {', '.join(cfg['ignore_paths'])}")
83
+
84
+ save_config(cfg)
85
+ click.echo(f"\nConfig saved to {get_dug_dir() / 'config.json'}")
86
+
87
+ # Build index
88
+ click.echo("\nStarting initial index...")
89
+ try:
90
+ from .indexer import run_init
91
+ from .embeddings import get_embedder
92
+ from .hooks import install_git_hooks, ensure_gitignore
93
+ gi_status = ensure_gitignore(root)
94
+ click.echo(f" .gitignore: {gi_status}")
95
+ embedder = get_embedder(cfg)
96
+ stats = run_init(root, embedder=embedder)
97
+ files = stats["nodes"].get("FILE", 0)
98
+ symbols = stats["nodes"].get("SYMBOL", 0)
99
+ commits = stats["nodes"].get("COMMIT", 0)
100
+ edges = stats["edges"]
101
+ chunks = stats.get("chunks", 0)
102
+ click.echo(f" FILE nodes: {files}")
103
+ click.echo(f" SYMBOL nodes: {symbols}")
104
+ click.echo(f" COMMIT nodes: {commits}")
105
+ click.echo(f" Total edges: {edges}")
106
+ click.echo(f" Chunks embedded: {chunks}")
107
+
108
+ # Install git hooks
109
+ hook_results = install_git_hooks(root)
110
+ if "error" not in hook_results:
111
+ click.echo(f"\n Git hooks:")
112
+ for hook, status in hook_results.items():
113
+ click.echo(f" {hook}: {status}")
114
+
115
+ click.echo("\n✓ dug is ready. Run: dug \"your error here\"")
116
+ except Exception as e:
117
+ click.echo(f"\n✗ Index failed: {e}", err=True)
118
+ sys.exit(1)
119
+
120
+
121
+ @cli.command(name="query")
122
+ @click.argument("bug_input")
123
+ def query(bug_input):
124
+ """Query the index with a bug or stack trace."""
125
+ cfg = load_config()
126
+ dug_dir = get_dug_dir()
127
+
128
+ if not (dug_dir / "graph.json").exists():
129
+ click.echo("No index found. Run: dug init", err=True)
130
+ sys.exit(1)
131
+
132
+ from .graph import CodeGraph
133
+ from .embeddings import get_embedder
134
+ from .vector_store import get_or_create_table
135
+ from .retriever import hybrid_search
136
+ from .verifier import verify_files
137
+ from .prompt_builder import build_prompt
138
+ from .git_context import get_git_history
139
+
140
+ graph = CodeGraph()
141
+ graph.load(dug_dir / "graph.json")
142
+
143
+ embedder = get_embedder(cfg)
144
+ table = get_or_create_table(dug_dir / "embeddings", cfg.get("embedding_mode", "local"))
145
+
146
+ ranked, signals = hybrid_search(
147
+ embedder, graph, table, bug_input,
148
+ top_k=cfg.get("max_files_in_prompt", 5),
149
+ )
150
+
151
+ # Verify candidates actually contain extracted symbols / bug words
152
+ root = find_repo_root()
153
+ verified_paths = verify_files(
154
+ [f.path for f in ranked], signals.get("symbols", []), root, bug_input
155
+ )
156
+ ranked = [f for f in ranked if f.path in verified_paths]
157
+
158
+ git_commits = get_git_history(root, depth=cfg.get("git_history_depth", 50))
159
+
160
+ prompt = build_prompt(bug_input, ranked, git_commits, signals)
161
+ click.echo(prompt)
162
+
163
+ # Save for `dug solved`
164
+ from .history import save_last_query
165
+ save_last_query(bug_input, [f.path for f in ranked], signals)
166
+
167
+
168
+ @cli.command()
169
+ @click.option("--changed-only", is_flag=True, help="Reindex only git-changed files.")
170
+ @click.option("--branch-switch", is_flag=True, hidden=True)
171
+ @click.option("--from", "from_ref", default="HEAD~1", hidden=True)
172
+ @click.option("--to", "to_ref", default="HEAD", hidden=True)
173
+ def update(changed_only, branch_switch, from_ref, to_ref):
174
+ """Refresh the graph and index."""
175
+ root = find_repo_root()
176
+ try:
177
+ if changed_only or branch_switch:
178
+ from .indexer import update_changed_files
179
+ result = update_changed_files(
180
+ root, from_ref=from_ref, to_ref=to_ref
181
+ )
182
+ pruned = result.get("pruned", [])
183
+ updated = result.get("updated", [])
184
+ skipped = result.get("skipped", [])
185
+ if pruned:
186
+ click.echo(f" Pruned {len(pruned)} deleted file(s).")
187
+ click.echo(f"✓ Updated {len(updated)} file(s), skipped {len(skipped)} unchanged.")
188
+ else:
189
+ click.echo("Rebuilding full index...")
190
+ from .indexer import run_init
191
+ from .hooks import ensure_gitignore
192
+ ensure_gitignore(root)
193
+ stats = run_init(root)
194
+ click.echo(f"✓ Done — {stats['nodes'].get('FILE', 0)} files, "
195
+ f"{stats.get('chunks', 0)} chunks.")
196
+ except Exception as e:
197
+ click.echo(f"✗ Update failed: {e}", err=True)
198
+ sys.exit(1)
199
+
200
+
201
+ @cli.command()
202
+ def watch():
203
+ """Start background file watcher — reindexes on save (1.5s debounce)."""
204
+ dug_dir = get_dug_dir()
205
+ if not (dug_dir / "graph.json").exists():
206
+ click.echo("No index found. Run: dug init first.", err=True)
207
+ sys.exit(1)
208
+ from .watcher import start_watch
209
+ start_watch(Path.cwd())
210
+
211
+
212
+ @cli.command()
213
+ @click.option("--files", "-f", default=None,
214
+ help="Comma-separated file paths that contained the fix.")
215
+ def solved(files):
216
+ """Record which files fixed the last bug — improves future rankings."""
217
+ from .history import load_last_query, record_resolved
218
+
219
+ last = load_last_query()
220
+ if not last:
221
+ click.echo("No recent query found. Run: dug \"your error\" first.", err=True)
222
+ sys.exit(1)
223
+
224
+ click.echo(f"\nLast query: \"{last['bug_input']}\"")
225
+ click.echo(f"Suggested files were:")
226
+ for f in last.get("ranked_files", []):
227
+ click.echo(f" - {f}")
228
+
229
+ if files:
230
+ resolved = [f.strip() for f in files.split(",") if f.strip()]
231
+ else:
232
+ click.echo("\nWhich files actually contained the bug? (comma-separated paths)")
233
+ click.echo("Press Enter to accept the suggestions above, or type new paths.")
234
+ raw = click.prompt(">", default=",".join(last.get("ranked_files", [])))
235
+ resolved = [f.strip() for f in raw.split(",") if f.strip()]
236
+
237
+ if not resolved:
238
+ click.echo("No files recorded.", err=True)
239
+ sys.exit(1)
240
+
241
+ record_resolved(last["bug_input"], resolved, last.get("signals", {}))
242
+
243
+ click.echo(f"\n✓ Saved. These files will rank higher for similar errors next time:")
244
+ for f in resolved:
245
+ click.echo(f" - {f}")
246
+
247
+
248
+ @cli.command()
249
+ def stats():
250
+ """Print graph stats."""
251
+ dug_dir = get_dug_dir()
252
+ if not (dug_dir / "graph.json").exists():
253
+ click.echo("No index found. Run: dug init", err=True)
254
+ sys.exit(1)
255
+
256
+ from .graph import CodeGraph
257
+ from .vector_store import get_or_create_table
258
+ cfg = load_config()
259
+ graph = CodeGraph()
260
+ graph.load(dug_dir / "graph.json")
261
+ s = graph.stats()
262
+ click.echo("\nGraph stats:")
263
+ for kind, count in s["nodes"].items():
264
+ click.echo(f" {kind}: {count}")
265
+ click.echo(f" Edges: {s['edges']}")
266
+ try:
267
+ table = get_or_create_table(dug_dir / "embeddings", cfg.get("embedding_mode", "local"))
268
+ click.echo(f" Chunks (embedded): {table.count_rows()}")
269
+ except Exception:
270
+ click.echo(" Chunks (embedded): n/a")
271
+
272
+
273
+ @cli.group()
274
+ def config():
275
+ """Manage dug configuration."""
276
+
277
+
278
+ @config.command(name="set")
279
+ @click.argument("key")
280
+ @click.argument("value")
281
+ def config_set(key, value):
282
+ """Set a config value. Example: dug config set embedding-mode openai"""
283
+ key = key.replace("-", "_")
284
+ set_config_value(key, value)
285
+ click.echo(f"✓ {key} = {value}")
286
+
287
+
288
+ @config.command(name="show")
289
+ def config_show():
290
+ """Show current config."""
291
+ cfg = load_config()
292
+ import json
293
+ click.echo(json.dumps(cfg, indent=2))
294
+
295
+
296
+ if __name__ == "__main__":
297
+ cli()
dug/chunker.py ADDED
@@ -0,0 +1,137 @@
1
+ """Tree-sitter based function/method extractor — produces chunks for embedding."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import hashlib
6
+ from dataclasses import dataclass
7
+ from pathlib import Path
8
+
9
+ from tree_sitter import Language, Parser, Node
10
+
11
+ import tree_sitter_python as tspython
12
+ import tree_sitter_java as tsjava
13
+ import tree_sitter_javascript as tsjavascript
14
+ import tree_sitter_typescript as tstypescript
15
+
16
+ # ---------------------------------------------------------------------------
17
+ # Language parsers
18
+ # ---------------------------------------------------------------------------
19
+
20
+ _LANGUAGES: dict[str, Language] = {
21
+ "python": Language(tspython.language()),
22
+ "java": Language(tsjava.language()),
23
+ "javascript": Language(tsjavascript.language()),
24
+ "typescript": Language(tstypescript.language_typescript()),
25
+ "tsx": Language(tstypescript.language_tsx()),
26
+ }
27
+
28
+ _EXT_TO_LANG: dict[str, str] = {
29
+ ".py": "python",
30
+ ".java": "java",
31
+ ".js": "javascript",
32
+ ".jsx": "javascript",
33
+ ".ts": "typescript",
34
+ ".tsx": "tsx",
35
+ }
36
+
37
+ # Node types that represent callable units worth embedding
38
+ _FUNCTION_NODE_TYPES: dict[str, set[str]] = {
39
+ "python": {"function_definition", "decorated_definition"},
40
+ "java": {"method_declaration", "constructor_declaration"},
41
+ "javascript": {"function_declaration", "method_definition", "arrow_function",
42
+ "function_expression"},
43
+ "typescript": {"function_declaration", "method_definition", "arrow_function",
44
+ "function_expression", "method_signature"},
45
+ "tsx": {"function_declaration", "method_definition", "arrow_function",
46
+ "function_expression", "method_signature"},
47
+ }
48
+
49
+ MIN_CHUNK_CHARS = 30
50
+ MAX_CHUNK_CHARS = 8000
51
+
52
+
53
+ # ---------------------------------------------------------------------------
54
+ # Chunk dataclass
55
+ # ---------------------------------------------------------------------------
56
+
57
+ @dataclass
58
+ class Chunk:
59
+ chunk_id: str # md5(file_path + function_name + str(start_line))
60
+ file_path: str # relative to repo root
61
+ function_name: str
62
+ start_line: int # 1-indexed
63
+ end_line: int
64
+ code: str
65
+ language: str
66
+
67
+
68
+ def _make_chunk_id(file_path: str, name: str, start_line: int) -> str:
69
+ key = f"{file_path}:{name}:{start_line}"
70
+ return hashlib.md5(key.encode()).hexdigest()
71
+
72
+
73
+ # ---------------------------------------------------------------------------
74
+ # AST walker
75
+ # ---------------------------------------------------------------------------
76
+
77
+ def _get_function_name(node: Node, code_bytes: bytes, language: str) -> str:
78
+ """Extract the best available name for a function/method node."""
79
+ # decorated_definition wraps the actual function — recurse one level
80
+ if node.type == "decorated_definition":
81
+ for child in node.children:
82
+ if child.type == "function_definition":
83
+ return _get_function_name(child, code_bytes, language)
84
+
85
+ name_node = node.child_by_field_name("name")
86
+ if name_node:
87
+ return code_bytes[name_node.start_byte:name_node.end_byte].decode(errors="replace")
88
+
89
+ # arrow functions often have no name — use parent context if available
90
+ return "<anonymous>"
91
+
92
+
93
+ def _walk(node: Node, code_bytes: bytes, language: str,
94
+ target_types: set[str], results: list[Chunk], file_path: str) -> None:
95
+ if node.type in target_types:
96
+ name = _get_function_name(node, code_bytes, language)
97
+ code = code_bytes[node.start_byte:node.end_byte].decode(errors="replace")
98
+ if MIN_CHUNK_CHARS <= len(code) <= MAX_CHUNK_CHARS and name != "<anonymous>":
99
+ results.append(Chunk(
100
+ chunk_id=_make_chunk_id(file_path, name, node.start_point[0] + 1),
101
+ file_path=file_path,
102
+ function_name=name,
103
+ start_line=node.start_point[0] + 1,
104
+ end_line=node.end_point[0] + 1,
105
+ code=code,
106
+ language=language,
107
+ ))
108
+ # still recurse — nested functions/methods should also be extracted
109
+ for child in node.children:
110
+ _walk(child, code_bytes, language, target_types, results, file_path)
111
+
112
+
113
+ # ---------------------------------------------------------------------------
114
+ # Public API
115
+ # ---------------------------------------------------------------------------
116
+
117
+ def extract_chunks(file_path: Path, root: Path) -> list[Chunk]:
118
+ """Parse `file_path` with tree-sitter and return one Chunk per function/method."""
119
+ lang = _EXT_TO_LANG.get(file_path.suffix)
120
+ if lang is None or lang not in _LANGUAGES:
121
+ return []
122
+
123
+ language = _LANGUAGES[lang]
124
+ parser = Parser(language)
125
+
126
+ try:
127
+ code_bytes = file_path.read_bytes()
128
+ except OSError:
129
+ return []
130
+
131
+ tree = parser.parse(code_bytes)
132
+ rel = str(file_path.relative_to(root))
133
+ target_types = _FUNCTION_NODE_TYPES.get(lang, set())
134
+
135
+ results: list[Chunk] = []
136
+ _walk(tree.root_node, code_bytes, lang, target_types, results, rel)
137
+ return results
dug/config.py ADDED
@@ -0,0 +1,77 @@
1
+ import json
2
+ import subprocess
3
+ from pathlib import Path
4
+
5
+ DEFAULTS = {
6
+ "embedding_mode": "local",
7
+ "api_key": None,
8
+ "languages": ["python", "java", "typescript", "javascript"],
9
+ "ignore_paths": ["node_modules", ".git", "build", "dist", "vendor", "__pycache__", ".venv", "venv", ".tox", "eggs", ".eggs"],
10
+ "git_history_depth": 50,
11
+ "max_files_in_prompt": 5,
12
+ "exclude_test_files": True,
13
+ }
14
+
15
+
16
+ def find_repo_root() -> Path:
17
+ """
18
+ Walk up from cwd looking for .git/. Falls back to cwd if not in a git repo.
19
+ Also accepts a repo root that already has .dug/ (supports non-git projects
20
+ that ran dug init manually).
21
+ """
22
+ try:
23
+ result = subprocess.run(
24
+ ["git", "rev-parse", "--show-toplevel"],
25
+ capture_output=True,
26
+ text=True,
27
+ cwd=Path.cwd(),
28
+ )
29
+ if result.returncode == 0:
30
+ return Path(result.stdout.strip())
31
+ except FileNotFoundError:
32
+ pass
33
+
34
+ # Fallback: walk up looking for an existing .dug/ directory
35
+ current = Path.cwd()
36
+ for parent in [current, *current.parents]:
37
+ if (parent / ".dug").exists():
38
+ return parent
39
+
40
+ return Path.cwd()
41
+
42
+
43
+ def get_dug_dir() -> Path:
44
+ return find_repo_root() / ".dug"
45
+
46
+
47
+ def get_config_path() -> Path:
48
+ return get_dug_dir() / "config.json"
49
+
50
+
51
+ def load_config() -> dict:
52
+ path = get_config_path()
53
+ if not path.exists():
54
+ return dict(DEFAULTS)
55
+ with open(path) as f:
56
+ data = json.load(f)
57
+ return {**DEFAULTS, **data}
58
+
59
+
60
+ def save_config(cfg: dict) -> None:
61
+ path = get_config_path()
62
+ path.parent.mkdir(parents=True, exist_ok=True)
63
+ with open(path, "w") as f:
64
+ json.dump(cfg, f, indent=2)
65
+
66
+
67
+ def set_config_value(key: str, value: str) -> None:
68
+ cfg = load_config()
69
+ # coerce booleans and nulls
70
+ if value.lower() == "null":
71
+ cfg[key] = None
72
+ elif value.lower() in ("true", "false"):
73
+ cfg[key] = value.lower() == "true"
74
+ else:
75
+ cfg[key] = value
76
+ save_config(cfg)
77
+ # phase 4 test comment
dug/embeddings.py ADDED
@@ -0,0 +1,97 @@
1
+ from __future__ import annotations
2
+
3
+ import logging
4
+ import os
5
+ import subprocess
6
+ import sys
7
+ import warnings
8
+
9
+ # Suppress HuggingFace noise before any library import
10
+ os.environ.setdefault("HF_HUB_DISABLE_PROGRESS_BARS", "1")
11
+ os.environ.setdefault("TOKENIZERS_PARALLELISM", "false")
12
+ os.environ.setdefault("TRANSFORMERS_NO_ADVISORY_WARNINGS", "1")
13
+ os.environ.setdefault("HF_HUB_VERBOSITY", "error")
14
+ warnings.filterwarnings("ignore", category=UserWarning, module="huggingface_hub")
15
+ warnings.filterwarnings("ignore", category=FutureWarning, module="transformers")
16
+
17
+ for _noisy in ("sentence_transformers", "huggingface_hub", "transformers",
18
+ "torch", "tokenizers"):
19
+ logging.getLogger(_noisy).setLevel(logging.ERROR)
20
+
21
+ # ---------------------------------------------------------------------------
22
+ # Dependency installer
23
+ # ---------------------------------------------------------------------------
24
+
25
+ _LOCAL_DEPS = ["sentence-transformers"]
26
+ _OPENAI_DEPS = ["openai"]
27
+
28
+
29
+ def _ensure_installed(packages: list[str], label: str) -> None:
30
+ """Check if packages are importable; pip-install them if not."""
31
+ import importlib
32
+ missing = []
33
+ for pkg in packages:
34
+ module = pkg.replace("-", "_").split("[")[0]
35
+ try:
36
+ importlib.import_module(module)
37
+ except ImportError:
38
+ missing.append(pkg)
39
+
40
+ if not missing:
41
+ return
42
+
43
+ print(f"\n[dug] {label} dependencies not found: {', '.join(missing)}")
44
+ print(f"[dug] Installing (one-time download)...\n")
45
+
46
+ try:
47
+ subprocess.check_call(
48
+ [sys.executable, "-m", "pip", "install", "--quiet", *missing],
49
+ stdout=sys.stdout,
50
+ stderr=sys.stderr,
51
+ )
52
+ print(f"\n[dug] ✓ {label} dependencies installed.\n")
53
+ except subprocess.CalledProcessError:
54
+ print(f"\n[dug] ✗ Auto-install failed. Run manually:")
55
+ print(f" pip install {' '.join(missing)}")
56
+ sys.exit(1)
57
+
58
+
59
+ # ---------------------------------------------------------------------------
60
+ # Embedders
61
+ # ---------------------------------------------------------------------------
62
+
63
+ class LocalEmbedder:
64
+ def __init__(self):
65
+ _ensure_installed(_LOCAL_DEPS, "Local embedding")
66
+ from sentence_transformers import SentenceTransformer
67
+ self.model = SentenceTransformer("all-MiniLM-L6-v2")
68
+
69
+ def embed(self, text: str) -> list[float]:
70
+ return self.model.encode(text).tolist()
71
+
72
+
73
+ class OpenAIEmbedder:
74
+ def __init__(self, api_key: str):
75
+ _ensure_installed(_OPENAI_DEPS, "OpenAI")
76
+ from openai import OpenAI
77
+ self.client = OpenAI(api_key=api_key)
78
+
79
+ def embed(self, text: str) -> list[float]:
80
+ response = self.client.embeddings.create(
81
+ model="text-embedding-3-small",
82
+ input=text,
83
+ )
84
+ return response.data[0].embedding
85
+
86
+
87
+ _cache: dict = {}
88
+
89
+
90
+ def get_embedder(config: dict) -> LocalEmbedder | OpenAIEmbedder:
91
+ mode = config.get("embedding_mode", "local")
92
+ if mode not in _cache:
93
+ if mode == "openai":
94
+ _cache[mode] = OpenAIEmbedder(api_key=config["api_key"])
95
+ else:
96
+ _cache[mode] = LocalEmbedder()
97
+ return _cache[mode]
dug/git_context.py ADDED
@@ -0,0 +1,56 @@
1
+ import subprocess
2
+ from dataclasses import dataclass, field
3
+ from datetime import datetime, timezone
4
+ from pathlib import Path
5
+
6
+
7
+ @dataclass
8
+ class Commit:
9
+ hash: str
10
+ message: str
11
+ timestamp: datetime
12
+ files_touched: list[str] = field(default_factory=list)
13
+
14
+ @property
15
+ def days_ago(self) -> int:
16
+ delta = datetime.now(timezone.utc) - self.timestamp.astimezone(timezone.utc)
17
+ return delta.days
18
+
19
+
20
+ def get_git_history(root: Path, depth: int = 50) -> list[Commit]:
21
+ try:
22
+ result = subprocess.run(
23
+ ["git", "log", "--name-only", "--format=%H|%s|%aI", f"-n{depth}"],
24
+ capture_output=True,
25
+ text=True,
26
+ cwd=root,
27
+ )
28
+ except FileNotFoundError:
29
+ return []
30
+
31
+ if result.returncode != 0:
32
+ return []
33
+
34
+ commits: list[Commit] = []
35
+ current: Commit | None = None
36
+
37
+ for line in result.stdout.splitlines():
38
+ line = line.strip()
39
+ if not line:
40
+ continue
41
+ if "|" in line and len(line.split("|")) >= 3:
42
+ parts = line.split("|", 2)
43
+ try:
44
+ ts = datetime.fromisoformat(parts[2])
45
+ except ValueError:
46
+ ts = datetime.now(timezone.utc)
47
+ if current:
48
+ commits.append(current)
49
+ current = Commit(hash=parts[0], message=parts[1], timestamp=ts)
50
+ elif current is not None:
51
+ current.files_touched.append(line)
52
+
53
+ if current:
54
+ commits.append(current)
55
+
56
+ return commits