dug-cli 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
dug/hooks.py ADDED
@@ -0,0 +1,112 @@
1
+ """Git hook installer — writes post-commit and post-checkout hooks."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import os
6
+ import stat
7
+ from pathlib import Path
8
+
9
+ _RESOLVE_DUG = """\
10
+ # Resolve dug — works whether installed via pipx, brew, uv, or binary
11
+ _dug_cmd() {
12
+ if command -v dug >/dev/null 2>&1; then
13
+ dug "$@"
14
+ elif command -v uvx >/dev/null 2>&1; then
15
+ uvx dug-cli "$@"
16
+ elif command -v pipx >/dev/null 2>&1; then
17
+ pipx run dug-cli "$@"
18
+ else
19
+ echo "[dug] dug not found on PATH. Install with: pipx install dug-cli" >&2
20
+ exit 0 # exit 0 so git commit still succeeds
21
+ fi
22
+ }
23
+ """
24
+
25
+ _POST_COMMIT = """\
26
+ #!/bin/sh
27
+ # dug: reindex files changed in this commit
28
+ """ + _RESOLVE_DUG + """
29
+ _dug_cmd update --changed-only
30
+ """
31
+
32
+ _POST_CHECKOUT = """\
33
+ #!/bin/sh
34
+ # dug: reindex files that differ after a branch switch
35
+ """ + _RESOLVE_DUG + """
36
+ PREV_HEAD="$1"
37
+ NEW_HEAD="$2"
38
+ IS_BRANCH="$3"
39
+ if [ "$IS_BRANCH" = "1" ]; then
40
+ _dug_cmd update --branch-switch --from="$PREV_HEAD" --to="$NEW_HEAD"
41
+ fi
42
+ """
43
+
44
+ _DUG_MARKER = "# dug:"
45
+
46
+
47
+ def _write_hook(hook_path: Path, content: str) -> str:
48
+ """Append dug block to an existing hook or create a new one."""
49
+ if hook_path.exists():
50
+ existing = hook_path.read_text()
51
+ if _DUG_MARKER in existing:
52
+ return "already installed"
53
+ # Append to existing hook
54
+ updated = existing.rstrip() + "\n\n" + content
55
+ hook_path.write_text(updated)
56
+ return "appended to existing hook"
57
+ else:
58
+ hook_path.write_text(content)
59
+ # Make executable
60
+ hook_path.chmod(hook_path.stat().st_mode | stat.S_IXUSR | stat.S_IXGRP | stat.S_IXOTH)
61
+ return "created"
62
+
63
+
64
+ def install_git_hooks(root: Path) -> dict[str, str]:
65
+ """Install post-commit and post-checkout hooks. Returns status per hook."""
66
+ hooks_dir = root / ".git" / "hooks"
67
+ if not hooks_dir.exists():
68
+ return {"error": "not a git repo or .git/hooks missing"}
69
+
70
+ results = {}
71
+ results["post-commit"] = _write_hook(hooks_dir / "post-commit", _POST_COMMIT)
72
+ results["post-checkout"] = _write_hook(hooks_dir / "post-checkout", _POST_CHECKOUT)
73
+ return results
74
+
75
+
76
+ def ensure_gitignore(root: Path) -> str:
77
+ """Add .dug/ to .gitignore if not already present. Returns status string."""
78
+ gitignore = root / ".gitignore"
79
+ entry = ".dug/"
80
+
81
+ if gitignore.exists():
82
+ lines = gitignore.read_text().splitlines()
83
+ # Check for exact match or glob that already covers it
84
+ if any(line.strip() in (entry, ".dug", "**/.dug/", "**/.dug") for line in lines):
85
+ return "already in .gitignore"
86
+ # Append with a section comment
87
+ with open(gitignore, "a") as f:
88
+ f.write(f"\n# dug local index — machine-specific, never commit\n{entry}\n")
89
+ return "added to existing .gitignore"
90
+ else:
91
+ gitignore.write_text(f"# dug local index — machine-specific, never commit\n{entry}\n")
92
+ return "created .gitignore"
93
+
94
+
95
+ def uninstall_git_hooks(root: Path) -> dict[str, str]:
96
+ """Remove the dug block from hooks (leaves other hook content intact)."""
97
+ hooks_dir = root / ".git" / "hooks"
98
+ results = {}
99
+ for name, content in [("post-commit", _POST_COMMIT), ("post-checkout", _POST_CHECKOUT)]:
100
+ hook_path = hooks_dir / name
101
+ if not hook_path.exists():
102
+ results[name] = "not found"
103
+ continue
104
+ existing = hook_path.read_text()
105
+ if _DUG_MARKER not in existing:
106
+ results[name] = "not installed"
107
+ continue
108
+ # Remove the dug block
109
+ cleaned = existing.replace("\n\n" + content, "").replace(content, "")
110
+ hook_path.write_text(cleaned)
111
+ results[name] = "removed"
112
+ return results
dug/indexer.py ADDED
@@ -0,0 +1,294 @@
1
+ """Indexer — full init and incremental per-file updates."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import hashlib
6
+ import json
7
+ import subprocess
8
+ from pathlib import Path
9
+
10
+ from .config import get_dug_dir, load_config
11
+ from .graph import (CodeGraph, build_graph, walk_repo, _ext_to_lang,
12
+ extract_symbols_ripgrep, extract_imports,
13
+ _resolve_import_to_file)
14
+ from .chunker import extract_chunks, Chunk
15
+ from .vector_store import get_or_create_table, upsert_chunks, delete_file_chunks
16
+
17
+
18
+ # ---------------------------------------------------------------------------
19
+ # Persistence helpers
20
+ # ---------------------------------------------------------------------------
21
+
22
+ def get_hashes_path() -> Path:
23
+ return get_dug_dir() / "hashes.json"
24
+
25
+
26
+ def get_chunk_cache_path() -> Path:
27
+ return get_dug_dir() / "chunk_cache.json"
28
+
29
+
30
+ def load_hashes() -> dict[str, str]:
31
+ p = get_hashes_path()
32
+ if not p.exists():
33
+ return {}
34
+ with open(p) as f:
35
+ return json.load(f)
36
+
37
+
38
+ def save_hashes(hashes: dict[str, str]) -> None:
39
+ p = get_hashes_path()
40
+ p.parent.mkdir(parents=True, exist_ok=True)
41
+ with open(p, "w") as f:
42
+ json.dump(hashes, f, indent=2)
43
+
44
+
45
+ def load_chunk_cache() -> dict[str, list[float]]:
46
+ p = get_chunk_cache_path()
47
+ if not p.exists():
48
+ return {}
49
+ with open(p) as f:
50
+ return json.load(f)
51
+
52
+
53
+ def save_chunk_cache(cache: dict[str, list[float]]) -> None:
54
+ p = get_chunk_cache_path()
55
+ p.parent.mkdir(parents=True, exist_ok=True)
56
+ with open(p, "w") as f:
57
+ json.dump(cache, f)
58
+
59
+
60
+ def file_hash(path: Path) -> str:
61
+ return hashlib.md5(path.read_bytes()).hexdigest()
62
+
63
+
64
+ def needs_reindex(path: Path, hashes: dict[str, str]) -> bool:
65
+ return file_hash(path) != hashes.get(str(path))
66
+
67
+
68
+ # ---------------------------------------------------------------------------
69
+ # Embedding helper
70
+ # ---------------------------------------------------------------------------
71
+
72
+ def _embed_chunks(chunks: list[Chunk], embedder, cache: dict) -> list[dict]:
73
+ rows = []
74
+ for chunk in chunks:
75
+ code_hash = hashlib.md5(chunk.code.encode()).hexdigest()
76
+ if code_hash in cache:
77
+ vector = cache[code_hash]
78
+ else:
79
+ vector = embedder.embed(chunk.code)
80
+ cache[code_hash] = vector
81
+ rows.append({
82
+ "chunk_id": chunk.chunk_id,
83
+ "file_path": chunk.file_path,
84
+ "function_name": chunk.function_name,
85
+ "start_line": chunk.start_line,
86
+ "end_line": chunk.end_line,
87
+ "language": chunk.language,
88
+ "vector": vector,
89
+ })
90
+ return rows
91
+
92
+
93
+ # ---------------------------------------------------------------------------
94
+ # Full init (rebuilds everything)
95
+ # ---------------------------------------------------------------------------
96
+
97
+ def run_init(root: Path | None = None, embedder=None, progress: bool = True) -> dict:
98
+ from .config import find_repo_root
99
+ root = root or find_repo_root()
100
+ config = load_config()
101
+
102
+ # Phase 1: structural graph
103
+ graph = build_graph(root, config)
104
+ graph_path = get_dug_dir() / "graph.json"
105
+ graph.save(graph_path)
106
+
107
+ # Phase 2: semantic index
108
+ if embedder is None:
109
+ from .embeddings import get_embedder
110
+ embedder = get_embedder(config)
111
+
112
+ cache = load_chunk_cache()
113
+ db_path = get_dug_dir() / "embeddings"
114
+ table = get_or_create_table(db_path, config.get("embedding_mode", "local"))
115
+
116
+ files = walk_repo(root, config.get("ignore_paths", []), config.get("languages", []))
117
+ all_chunks: list[Chunk] = []
118
+ for f in files:
119
+ all_chunks.extend(extract_chunks(f, root))
120
+
121
+ rows = []
122
+ cache_hits = 0
123
+ for i, chunk in enumerate(all_chunks):
124
+ if progress:
125
+ print(f"\r Embedding functions... {i + 1}/{len(all_chunks)}", end="", flush=True)
126
+ code_hash = hashlib.md5(chunk.code.encode()).hexdigest()
127
+ if code_hash in cache:
128
+ vector = cache[code_hash]
129
+ cache_hits += 1
130
+ else:
131
+ vector = embedder.embed(chunk.code)
132
+ cache[code_hash] = vector
133
+ rows.append({
134
+ "chunk_id": chunk.chunk_id,
135
+ "file_path": chunk.file_path,
136
+ "function_name": chunk.function_name,
137
+ "start_line": chunk.start_line,
138
+ "end_line": chunk.end_line,
139
+ "language": chunk.language,
140
+ "vector": vector,
141
+ })
142
+
143
+ if progress and all_chunks:
144
+ print()
145
+
146
+ # Full rebuild: wipe existing chunks and reinsert
147
+ for f in files:
148
+ delete_file_chunks(table, str(f.relative_to(root)))
149
+ upsert_chunks(table, rows)
150
+ save_chunk_cache(cache)
151
+
152
+ # Save file hashes for incremental guard
153
+ hashes = {str(f): file_hash(f) for f in files}
154
+ save_hashes(hashes)
155
+
156
+ return {
157
+ **graph.stats(),
158
+ "chunks": len(all_chunks),
159
+ "cache_hits": cache_hits,
160
+ "embedded": len(all_chunks) - cache_hits,
161
+ }
162
+
163
+
164
+ # ---------------------------------------------------------------------------
165
+ # Single-file incremental update
166
+ # ---------------------------------------------------------------------------
167
+
168
+ def update_file(file_path: Path, root: Path, embedder=None) -> dict:
169
+ """Reindex one file — cleanup stale data, re-extract, re-embed."""
170
+ config = load_config()
171
+ rel_path = str(file_path.relative_to(root))
172
+
173
+ # Guard 1: skip if content unchanged
174
+ hashes = load_hashes()
175
+ if file_path.exists() and not needs_reindex(file_path, hashes):
176
+ return {"skipped": True, "path": rel_path}
177
+
178
+ # Load graph
179
+ graph = CodeGraph()
180
+ graph.load(get_dug_dir() / "graph.json")
181
+
182
+ # Get current file set for import resolution
183
+ all_file_rels: set[str] = {
184
+ d["path"]
185
+ for _, d in graph.g.nodes(data=True)
186
+ if d.get("kind") == "FILE"
187
+ }
188
+ all_file_rels.add(rel_path)
189
+
190
+ # Stale cleanup + re-add to graph
191
+ graph.update_file_data(file_path, root, all_file_rels)
192
+ graph.save(get_dug_dir() / "graph.json")
193
+
194
+ # LanceDB: delete old chunks for this file, insert new ones
195
+ db_path = get_dug_dir() / "embeddings"
196
+ table = get_or_create_table(db_path, config.get("embedding_mode", "local"))
197
+ delete_file_chunks(table, rel_path)
198
+
199
+ chunk_count = 0
200
+ if file_path.exists():
201
+ if embedder is None:
202
+ from .embeddings import get_embedder
203
+ embedder = get_embedder(config)
204
+
205
+ cache = load_chunk_cache()
206
+ chunks = extract_chunks(file_path, root)
207
+ rows = _embed_chunks(chunks, embedder, cache)
208
+ if rows:
209
+ upsert_chunks(table, rows)
210
+ save_chunk_cache(cache)
211
+
212
+ # Update hash
213
+ hashes[str(file_path)] = file_hash(file_path)
214
+ save_hashes(hashes)
215
+ chunk_count = len(chunks)
216
+
217
+ return {"updated": rel_path, "chunks": chunk_count}
218
+
219
+
220
+ # ---------------------------------------------------------------------------
221
+ # Git-driven multi-file update
222
+ # ---------------------------------------------------------------------------
223
+
224
+ def _git_changed_files(root: Path, base: str = "HEAD~1", head: str = "HEAD") -> list[Path]:
225
+ """Return absolute paths of files changed between two git refs."""
226
+ try:
227
+ result = subprocess.run(
228
+ ["git", "diff", "--name-only", base, head],
229
+ capture_output=True, text=True, cwd=root,
230
+ )
231
+ except FileNotFoundError:
232
+ return []
233
+ if result.returncode != 0:
234
+ return []
235
+
236
+ config = load_config()
237
+ valid_exts: set[str] = set()
238
+ from .graph import LANG_EXTENSIONS
239
+ for lang in config.get("languages", []):
240
+ valid_exts.update(LANG_EXTENSIONS.get(lang, []))
241
+
242
+ paths = []
243
+ for line in result.stdout.splitlines():
244
+ p = root / line.strip()
245
+ if p.suffix in valid_exts:
246
+ paths.append(p)
247
+ return paths
248
+
249
+
250
+ def update_changed_files(
251
+ root: Path,
252
+ embedder=None,
253
+ from_ref: str = "HEAD~1",
254
+ to_ref: str = "HEAD",
255
+ progress: bool = True,
256
+ ) -> dict:
257
+ """Reindex only files that changed between two git refs + prune deleted."""
258
+ graph = CodeGraph()
259
+ graph.load(get_dug_dir() / "graph.json")
260
+
261
+ # Prune deleted files first
262
+ config = load_config()
263
+ db_path = get_dug_dir() / "embeddings"
264
+ table = get_or_create_table(db_path, config.get("embedding_mode", "local"))
265
+
266
+ stale = graph.prune_stale_nodes(root)
267
+ for rel in stale:
268
+ delete_file_chunks(table, rel)
269
+ if stale:
270
+ graph.save(get_dug_dir() / "graph.json")
271
+
272
+ # Reindex changed files
273
+ changed = _git_changed_files(root, from_ref, to_ref)
274
+ if not changed:
275
+ return {"pruned": stale, "updated": [], "skipped": []}
276
+
277
+ if embedder is None:
278
+ from .embeddings import get_embedder
279
+ embedder = get_embedder(config)
280
+
281
+ updated, skipped = [], []
282
+ for i, f in enumerate(changed):
283
+ if progress:
284
+ print(f"\r Updating {i + 1}/{len(changed)}: {f.name} ", end="", flush=True)
285
+ result = update_file(f, root, embedder)
286
+ if result.get("skipped"):
287
+ skipped.append(str(f.relative_to(root)))
288
+ else:
289
+ updated.append(result["updated"])
290
+
291
+ if progress and changed:
292
+ print()
293
+
294
+ return {"pruned": stale, "updated": updated, "skipped": skipped}
dug/prompt_builder.py ADDED
@@ -0,0 +1,106 @@
1
+ """Prompt builder — assembles the Claude Code prompt. Pure Python, zero LLM."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import datetime
6
+ from pathlib import Path
7
+
8
+ from .retriever import RankedFile
9
+ from .git_context import Commit
10
+
11
+
12
+ def _ago(ts: float) -> str:
13
+ if not ts:
14
+ return "unknown"
15
+ dt = datetime.datetime.fromtimestamp(ts)
16
+ delta = datetime.datetime.now() - dt
17
+ if delta.days == 0:
18
+ hours = delta.seconds // 3600
19
+ return f"{hours}h ago" if hours > 0 else "just now"
20
+ if delta.days == 1:
21
+ return "1 day ago"
22
+ return f"{delta.days} days ago"
23
+
24
+
25
+ def _commit_ago(commit: Commit) -> str:
26
+ return f"{commit.days_ago}d ago"
27
+
28
+
29
+ def build_prompt(
30
+ bug_input: str,
31
+ ranked_files: list[RankedFile],
32
+ git_commits: list[Commit],
33
+ signals: dict,
34
+ ) -> str:
35
+ # --- Files section ---
36
+ files_lines = []
37
+ for f in ranked_files:
38
+ reason_str = ", ".join(f.reasons) if f.reasons else "semantic match"
39
+ modified_str = _ago(f.last_modified)
40
+ files_lines.append(f" - {f.path} ({reason_str}, modified {modified_str})")
41
+ files_section = "\n".join(files_lines) if files_lines else " (none found)"
42
+
43
+ # --- Import chain ---
44
+ chain = ranked_files[0].import_chain if ranked_files else []
45
+ chain_section = " → ".join(chain) if len(chain) > 1 else (chain[0] if chain else "n/a")
46
+
47
+ # --- Commits touching ranked files ---
48
+ ranked_paths = {f.path for f in ranked_files}
49
+ relevant_commits = [
50
+ c for c in git_commits
51
+ if any(fp in ranked_paths for fp in c.files_touched)
52
+ ][:3]
53
+
54
+ if relevant_commits:
55
+ commits_lines = [
56
+ f" {c.hash[:7]}: \"{c.message}\" ({_commit_ago(c)})"
57
+ for c in relevant_commits
58
+ ]
59
+ elif git_commits:
60
+ commits_lines = [
61
+ f" {c.hash[:7]}: \"{c.message}\" ({_commit_ago(c)})"
62
+ for c in git_commits[:3]
63
+ ]
64
+ else:
65
+ commits_lines = [" (no git history found)"]
66
+ commits_section = "\n".join(commits_lines)
67
+
68
+ # --- Starting point ---
69
+ if ranked_files:
70
+ start = ranked_files[0]
71
+ imports_str = (
72
+ ", ".join(start.imports[:3]) if start.imports
73
+ else "no tracked imports"
74
+ )
75
+ line_hint = ""
76
+ if signals.get("line_numbers"):
77
+ line_hint = f" (line {signals['line_numbers'][0]} mentioned in input)"
78
+ start_section = (
79
+ f"Begin at {start.path}{line_hint}.\n"
80
+ f" Modified {_ago(start.last_modified)}. "
81
+ f"Imports: {imports_str}."
82
+ )
83
+ else:
84
+ start_section = "No clear starting point found."
85
+
86
+ # --- Error type hint ---
87
+ error_hint = ""
88
+ if signals.get("error_type"):
89
+ error_hint = f"\n**Error type:** `{signals['error_type']}`\n"
90
+
91
+ return f"""## Bug Report
92
+
93
+ **Error:** {bug_input}
94
+ {error_hint}
95
+ **Files to investigate (ranked by relevance):**
96
+ {files_section}
97
+
98
+ **Import chain:**
99
+ {chain_section}
100
+
101
+ **Recent commits touching these files:**
102
+ {commits_section}
103
+
104
+ **Suggested starting point:**
105
+ {start_section}
106
+ """