dug-cli 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
dug/retriever.py ADDED
@@ -0,0 +1,249 @@
1
+ """Hybrid retriever — merges structural graph lookup with semantic search."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import re
6
+ from dataclasses import dataclass, field
7
+ from pathlib import Path
8
+
9
+ # Test file patterns — excluded from ranked results by default
10
+ _TEST_PATTERNS = (
11
+ "test_", "_test.", ".test.", ".spec.", "_spec.",
12
+ "/test/", "/tests/", "/spec/", "/__tests__/",
13
+ )
14
+
15
+
16
+ def _is_test_file(path: str) -> bool:
17
+ p = path.lower().replace("\\", "/")
18
+ return any(pat in p for pat in _TEST_PATTERNS)
19
+
20
+
21
+ def _bug_tokens(text: str) -> set[str]:
22
+ """Significant words from a bug string — reuses history.py logic inline."""
23
+ text = re.sub(r'([a-z])([A-Z])', r'\1 \2', text)
24
+ words = re.findall(r'[a-zA-Z]+', text.lower())
25
+ stopwords = {"the", "a", "an", "in", "at", "on", "is", "was", "with",
26
+ "and", "or", "for", "to", "of", "from", "that", "this"}
27
+ return {w for w in words if len(w) > 3 and w not in stopwords}
28
+
29
+
30
+ # ---------------------------------------------------------------------------
31
+ # Signal extraction
32
+ # ---------------------------------------------------------------------------
33
+
34
+ _ERROR_TYPES = [
35
+ "NullPointerException", "NPE", "NullReferenceException",
36
+ "KeyError", "TypeError", "ValueError", "AttributeError",
37
+ "ImportError", "ModuleNotFoundError", "NameError",
38
+ "IndexError", "RuntimeError", "AssertionError",
39
+ "FileNotFoundError", "PermissionError", "TimeoutError",
40
+ "ConnectionError", "HTTPError", "404", "500", "503",
41
+ "StackOverflow", "OutOfMemoryError", "ClassNotFoundException",
42
+ ]
43
+
44
+
45
+ def extract_signals(bug_input: str) -> dict:
46
+ """Pull structured signals out of a raw bug string — pure regex, no LLM."""
47
+ files = re.findall(r'[\w/.-]+\.(?:java|py|ts|tsx|js|jsx)', bug_input)
48
+ # Java/Python stack trace symbols: "at ClassName.method(" or "in function_name"
49
+ symbols = re.findall(r'at\s+(\w+)(?:\.\w+)*\s*\(', bug_input)
50
+ symbols += re.findall(r'in\s+([a-z_]\w+)\b', bug_input)
51
+ symbols += re.findall(r'([A-Z]\w*(?:Service|Controller|Handler|Manager|Processor|Client|Repository|Util|Helper))', bug_input)
52
+ line_numbers = re.findall(r':(\d+)', bug_input)
53
+ error_type = next((e for e in _ERROR_TYPES if e.lower() in bug_input.lower()), None)
54
+
55
+ return {
56
+ "files": list(dict.fromkeys(files)), # deduped, order preserved
57
+ "symbols": list(dict.fromkeys(symbols)),
58
+ "line_numbers": [int(n) for n in line_numbers],
59
+ "error_type": error_type,
60
+ }
61
+
62
+
63
+ # ---------------------------------------------------------------------------
64
+ # Ranked file result
65
+ # ---------------------------------------------------------------------------
66
+
67
+ @dataclass
68
+ class RankedFile:
69
+ path: str
70
+ score: float
71
+ reasons: list[str] = field(default_factory=list)
72
+ last_modified: float = 0.0
73
+ imports: list[str] = field(default_factory=list)
74
+ import_chain: list[str] = field(default_factory=list)
75
+
76
+
77
+ # ---------------------------------------------------------------------------
78
+ # Scoring helpers
79
+ # ---------------------------------------------------------------------------
80
+
81
+ def _score_structural(graph, signals: dict, bug_input: str = "") -> dict[str, float]:
82
+ """Score files based on structural graph signals."""
83
+ scores: dict[str, float] = {}
84
+ reasons: dict[str, list[str]] = {}
85
+
86
+ def add(file_id: str, pts: float, reason: str) -> None:
87
+ path = file_id.removeprefix("file:")
88
+ scores[path] = scores.get(path, 0.0) + pts
89
+ reasons.setdefault(path, [])
90
+ if reason not in reasons[path]:
91
+ reasons[path].append(reason)
92
+
93
+ all_file_ids = {n for n, d in graph.g.nodes(data=True) if d.get("kind") == "FILE"}
94
+
95
+ # +10: file directly mentioned in the bug input
96
+ for sig_file in signals["files"]:
97
+ for fid in all_file_ids:
98
+ if sig_file in fid:
99
+ add(fid, 10, "directly in stack trace")
100
+
101
+ # +10: symbol mentioned → find file containing that symbol
102
+ for sym in signals["symbols"]:
103
+ for fid in graph.find_file_nodes_for_symbol(sym):
104
+ add(fid, 10, f"contains symbol '{sym}'")
105
+
106
+ # +5/+2: import neighbors of already-scored files
107
+ seeded = [f"file:{p}" for p in list(scores.keys())]
108
+ for fid in seeded:
109
+ neighbors = graph.get_import_neighbors(fid, hops=2)
110
+ for neighbor_id, hop in neighbors.items():
111
+ pts = 5 if hop == 1 else 2
112
+ label = "1-hop import neighbor" if hop == 1 else "2-hop import neighbor"
113
+ add(neighbor_id, pts, label)
114
+
115
+ # +8 if commit message shares tokens with bug; +2 if recently modified but unrelated
116
+ bug_tokens = _bug_tokens(bug_input)
117
+ commit_nodes = [
118
+ (n, d) for n, d in graph.g.nodes(data=True) if d.get("kind") == "COMMIT"
119
+ ]
120
+ recent_commits = sorted(
121
+ commit_nodes, key=lambda x: x[1].get("timestamp", ""), reverse=True
122
+ )[:3]
123
+ for commit_id, commit_data in recent_commits:
124
+ msg_tokens = _bug_tokens(commit_data.get("message", ""))
125
+ relevant = bool(bug_tokens & msg_tokens) if bug_tokens else False
126
+ pts, label = (8, "modified in relevant recent commit") if relevant \
127
+ else (2, "modified recently (unrelated commit)")
128
+ for neighbor in graph.g.successors(commit_id):
129
+ if graph.g.nodes[neighbor].get("kind") == "FILE":
130
+ add(neighbor, pts, label)
131
+
132
+ return scores, reasons
133
+
134
+
135
+ def _score_semantic(semantic_hits: list[dict]) -> dict[str, float]:
136
+ """Convert semantic search hits to file-level scores (+0 to +5)."""
137
+ scores: dict[str, float] = {}
138
+ for hit in semantic_hits:
139
+ path = hit["file_path"]
140
+ pts = hit["score"] * 5.0 # normalize 0–1 cosine → 0–5 points
141
+ scores[path] = max(scores.get(path, 0.0), pts)
142
+ return scores
143
+
144
+
145
+ # ---------------------------------------------------------------------------
146
+ # Import chain builder
147
+ # ---------------------------------------------------------------------------
148
+
149
+ def _build_import_chain(graph, file_path: str, max_hops: int = 4) -> list[str]:
150
+ """Walk import edges outward from `file_path` and return a chain."""
151
+ chain = [file_path]
152
+ current = f"file:{file_path}"
153
+ seen = {current}
154
+ for _ in range(max_hops):
155
+ neighbors = [
156
+ n for n in graph.g.successors(current)
157
+ if graph.g.nodes[n].get("kind") == "FILE" and n not in seen
158
+ and graph.g.edges[current, n].get("rel") == "imports"
159
+ ]
160
+ if not neighbors:
161
+ break
162
+ current = neighbors[0]
163
+ seen.add(current)
164
+ chain.append(current.removeprefix("file:"))
165
+ return chain
166
+
167
+
168
+ # ---------------------------------------------------------------------------
169
+ # Public API
170
+ # ---------------------------------------------------------------------------
171
+
172
+ def hybrid_search(
173
+ embedder,
174
+ graph,
175
+ vector_table,
176
+ bug_input: str,
177
+ top_k: int = 5,
178
+ ) -> tuple[list[RankedFile], dict]:
179
+ """
180
+ Combine structural + semantic + history signals, return ranked files + signals.
181
+ """
182
+ signals = extract_signals(bug_input)
183
+
184
+ # Layer 1 — structural
185
+ struct_scores, struct_reasons = _score_structural(graph, signals, bug_input)
186
+
187
+ # Layer 2 — semantic
188
+ query_vector = embedder.embed(bug_input)
189
+ from .vector_store import search as vec_search
190
+ semantic_hits = vec_search(vector_table, query_vector, top_k=15)
191
+ sem_scores = _score_semantic(semantic_hits)
192
+
193
+ # Merge layers 1 + 2
194
+ all_paths = set(struct_scores) | set(sem_scores)
195
+ merged: dict[str, float] = {}
196
+ for path in all_paths:
197
+ merged[path] = struct_scores.get(path, 0.0) + sem_scores.get(path, 0.0)
198
+
199
+ # Layer 3 — history boost (+0 to +6 based on past resolutions)
200
+ from .history import get_history_boost, get_error_pattern_boost
201
+ candidate_files = list(merged.keys())
202
+ history_boosts = get_history_boost(bug_input, signals, candidate_files)
203
+ pattern_boosts = get_error_pattern_boost(signals.get("error_type"), candidate_files)
204
+
205
+ history_reasons: dict[str, str] = {}
206
+ for path, pts in history_boosts.items():
207
+ merged[path] = merged.get(path, 0.0) + pts
208
+ history_reasons[path] = f"resolved similar bug before (+{pts:.1f})"
209
+
210
+ for path, pts in pattern_boosts.items():
211
+ merged[path] = merged.get(path, 0.0) + pts
212
+ if path not in history_reasons:
213
+ history_reasons[path] = f"common in {signals.get('error_type')} errors (+{pts:.1f})"
214
+
215
+ # Build RankedFile objects — skip test files unless explicitly mentioned in input
216
+ explicitly_mentioned = {f.lower() for f in signals["files"]}
217
+ ranked = []
218
+ for path, score in sorted(merged.items(), key=lambda x: x[1], reverse=True):
219
+ if len(ranked) >= top_k:
220
+ break
221
+ if _is_test_file(path) and not any(t in path.lower() for t in explicitly_mentioned):
222
+ continue
223
+ file_id = f"file:{path}"
224
+ node_data = graph.g.nodes.get(file_id, {})
225
+
226
+ reasons = list(struct_reasons.get(path, []))
227
+ sem_score = sem_scores.get(path, 0.0)
228
+ if sem_score > 0:
229
+ reasons.append(f"semantic match ({sem_score:.2f}/5)")
230
+ if path in history_reasons:
231
+ reasons.append(history_reasons[path])
232
+
233
+ raw_imports = [
234
+ n.removeprefix("file:")
235
+ for n in graph.g.successors(file_id)
236
+ if graph.g.nodes.get(n, {}).get("kind") == "FILE"
237
+ and graph.g.edges.get((file_id, n), {}).get("rel") == "imports"
238
+ ]
239
+
240
+ ranked.append(RankedFile(
241
+ path=path,
242
+ score=score,
243
+ reasons=reasons,
244
+ last_modified=node_data.get("last_modified", 0.0),
245
+ imports=raw_imports,
246
+ import_chain=_build_import_chain(graph, path),
247
+ ))
248
+
249
+ return ranked, signals
dug/vector_store.py ADDED
@@ -0,0 +1,79 @@
1
+ """LanceDB vector store — file-based, no server required."""
2
+
3
+ from __future__ import annotations
4
+
5
+ from pathlib import Path
6
+
7
+ import lancedb
8
+ import pyarrow as pa
9
+
10
+ # Dimension for all-MiniLM-L6-v2 (local). OpenAI text-embedding-3-small = 1536.
11
+ _DIM_LOCAL = 384
12
+ _DIM_OPENAI = 1536
13
+
14
+ TABLE_NAME = "functions"
15
+
16
+
17
+ def _schema(dim: int) -> pa.Schema:
18
+ return pa.schema([
19
+ pa.field("chunk_id", pa.string()),
20
+ pa.field("file_path", pa.string()),
21
+ pa.field("function_name", pa.string()),
22
+ pa.field("start_line", pa.int32()),
23
+ pa.field("end_line", pa.int32()),
24
+ pa.field("language", pa.string()),
25
+ pa.field("vector", pa.list_(pa.float32(), dim)),
26
+ ])
27
+
28
+
29
+ def get_or_create_table(db_path: Path, embedding_mode: str = "local") -> lancedb.table.Table:
30
+ db_path.mkdir(parents=True, exist_ok=True)
31
+ db = lancedb.connect(str(db_path))
32
+ dim = _DIM_OPENAI if embedding_mode == "openai" else _DIM_LOCAL
33
+ if TABLE_NAME in db.table_names():
34
+ return db.open_table(TABLE_NAME)
35
+ return db.create_table(TABLE_NAME, schema=_schema(dim))
36
+
37
+
38
+ def delete_file_chunks(table: lancedb.table.Table, rel_path: str) -> None:
39
+ """Remove all chunk rows belonging to a specific file."""
40
+ try:
41
+ # LanceDB uses SQL-style string for delete predicate
42
+ table.delete(f"file_path = '{rel_path.replace(chr(39), chr(39)*2)}'")
43
+ except Exception:
44
+ pass
45
+
46
+
47
+ def upsert_chunks(table: lancedb.table.Table, rows: list[dict]) -> None:
48
+ if not rows:
49
+ return
50
+ # LanceDB merge_insert: overwrite rows with matching chunk_id
51
+ table.merge_insert("chunk_id") \
52
+ .when_matched_update_all() \
53
+ .when_not_matched_insert_all() \
54
+ .execute(rows)
55
+
56
+
57
+ def search(
58
+ table: lancedb.table.Table,
59
+ query_vector: list[float],
60
+ top_k: int = 10,
61
+ ) -> list[dict]:
62
+ results = (
63
+ table.search(query_vector)
64
+ .metric("cosine")
65
+ .limit(top_k)
66
+ .to_list()
67
+ )
68
+ hits = []
69
+ for row in results:
70
+ hits.append({
71
+ "chunk_id": row["chunk_id"],
72
+ "file_path": row["file_path"],
73
+ "function_name": row["function_name"],
74
+ "start_line": row["start_line"],
75
+ "end_line": row["end_line"],
76
+ "language": row["language"],
77
+ "score": 1.0 - row.get("_distance", 0.0), # cosine: distance→similarity
78
+ })
79
+ return hits
dug/verifier.py ADDED
@@ -0,0 +1,73 @@
1
+ """Verifier — confirms candidate files are genuinely relevant via ripgrep checks."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import subprocess
6
+ from pathlib import Path
7
+
8
+ # Minimum file size to be worth surfacing (bytes) — filters out empty/stub files
9
+ _MIN_FILE_BYTES = 50
10
+
11
+
12
+ def _rg_contains(pattern: str, abs_path: Path, fixed: bool = True) -> bool:
13
+ flags = ["--fixed-strings"] if fixed else []
14
+ try:
15
+ result = subprocess.run(
16
+ ["rg", *flags, "--quiet", pattern, str(abs_path)],
17
+ capture_output=True,
18
+ )
19
+ return result.returncode == 0
20
+ except FileNotFoundError:
21
+ return True # rg not available — assume true
22
+
23
+
24
+ def verify_files(
25
+ candidate_files: list[str],
26
+ symbols: list[str],
27
+ root: Path,
28
+ bug_input: str = "",
29
+ ) -> list[str]:
30
+ """
31
+ Multi-pass verification — drops candidates that fail all checks.
32
+
33
+ Pass 1 (always): file must exist and be non-trivially sized.
34
+ Pass 2 (when symbols extracted): file must contain at least one symbol.
35
+ Pass 3 (when no symbols): file must contain at least one significant word
36
+ from the bug input — prevents completely unrelated files surfacing.
37
+ """
38
+ confirmed = []
39
+
40
+ # Derive significant words from bug input for pass 3
41
+ import re
42
+ words = re.findall(r'[a-zA-Z]{4,}', bug_input.lower())
43
+ stopwords = {"with", "that", "this", "from", "have", "been", "when",
44
+ "error", "fail", "fails", "issue", "problem", "exception"}
45
+ sig_words = [w for w in words if w not in stopwords][:8] # top 8 words
46
+
47
+ for rel_path in candidate_files:
48
+ abs_path = root / rel_path
49
+
50
+ # Pass 1: existence + size
51
+ if not abs_path.exists():
52
+ continue
53
+ if abs_path.stat().st_size < _MIN_FILE_BYTES:
54
+ continue
55
+
56
+ # Pass 2: symbol presence (when symbols available)
57
+ if symbols:
58
+ if any(_rg_contains(sym, abs_path) for sym in symbols):
59
+ confirmed.append(rel_path)
60
+ # Don't add to confirmed if none of the symbols found
61
+ continue
62
+
63
+ # Pass 3: word presence (when no symbols — guards against totally unrelated files)
64
+ if sig_words:
65
+ if any(_rg_contains(w, abs_path, fixed=True) for w in sig_words):
66
+ confirmed.append(rel_path)
67
+ else:
68
+ confirmed.append(rel_path) # soft pass — word match is best-effort
69
+ else:
70
+ confirmed.append(rel_path) # no words to check, pass through
71
+
72
+ # Safety net: never return empty — if all dropped, return originals
73
+ return confirmed if confirmed else candidate_files
dug/watcher.py ADDED
@@ -0,0 +1,103 @@
1
+ """File watcher — OS-native filesystem events + 1.5s debounce before reindex."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import threading
6
+ import time
7
+ from pathlib import Path
8
+
9
+ from watchdog.events import FileSystemEventHandler
10
+ from watchdog.observers import Observer
11
+
12
+ from .graph import LANG_EXTENSIONS, _should_ignore
13
+
14
+ DEBOUNCE_SECONDS = 1.5
15
+
16
+
17
+ class _DebounceHandler(FileSystemEventHandler):
18
+ def __init__(self, root: Path, ignore_paths: list[str],
19
+ valid_exts: set[str], embedder):
20
+ self.root = root
21
+ self.ignore_paths = ignore_paths
22
+ self.valid_exts = valid_exts
23
+ self.embedder = embedder
24
+ self._timers: dict[str, threading.Timer] = {}
25
+ self._lock = threading.Lock()
26
+
27
+ # watchdog fires on_modified for saves, on_created for new files,
28
+ # on_deleted for deletions — all three need reindex
29
+ def on_modified(self, event):
30
+ self._handle(event)
31
+
32
+ def on_created(self, event):
33
+ self._handle(event)
34
+
35
+ def on_deleted(self, event):
36
+ self._handle(event)
37
+
38
+ def _handle(self, event):
39
+ if event.is_directory:
40
+ return
41
+ path = Path(event.src_path)
42
+ if path.suffix not in self.valid_exts:
43
+ return
44
+ if _should_ignore(path, self.ignore_paths):
45
+ return
46
+ self._schedule(path)
47
+
48
+ def _schedule(self, path: Path) -> None:
49
+ """Debounce: reset the timer on every save, fire only after silence."""
50
+ key = str(path)
51
+ with self._lock:
52
+ if key in self._timers:
53
+ self._timers[key].cancel()
54
+ timer = threading.Timer(DEBOUNCE_SECONDS, self._reindex, args=[path])
55
+ self._timers[key] = timer
56
+ timer.start()
57
+
58
+ def _reindex(self, path: Path) -> None:
59
+ from .indexer import update_file
60
+ key = str(path)
61
+ with self._lock:
62
+ self._timers.pop(key, None)
63
+ try:
64
+ result = update_file(path, self.root, self.embedder)
65
+ if not result.get("skipped"):
66
+ rel = result.get("updated", path.name)
67
+ chunks = result.get("chunks", 0)
68
+ print(f"\r[dug] ✓ {rel} ({chunks} chunks reindexed) ", flush=True)
69
+ except Exception as e:
70
+ print(f"\r[dug] ✗ error reindexing {path.name}: {e}", flush=True)
71
+
72
+
73
+ def start_watch(root: Path | None = None) -> None:
74
+ """Start the file watcher. Blocks until Ctrl+C."""
75
+ from .config import load_config
76
+ from .embeddings import get_embedder
77
+
78
+ from .config import find_repo_root
79
+ root = root or find_repo_root()
80
+ config = load_config()
81
+
82
+ valid_exts: set[str] = set()
83
+ for lang in config.get("languages", []):
84
+ valid_exts.update(LANG_EXTENSIONS.get(lang, []))
85
+
86
+ embedder = get_embedder(config)
87
+ handler = _DebounceHandler(root, config.get("ignore_paths", []), valid_exts, embedder)
88
+
89
+ observer = Observer()
90
+ observer.schedule(handler, str(root), recursive=True)
91
+ observer.start()
92
+
93
+ print(f"[dug] watching {root}")
94
+ print(f"[dug] debounce: {DEBOUNCE_SECONDS}s — Ctrl+C to stop")
95
+ try:
96
+ while True:
97
+ time.sleep(1)
98
+ except KeyboardInterrupt:
99
+ pass
100
+ finally:
101
+ observer.stop()
102
+ observer.join()
103
+ print("\n[dug] watcher stopped.")
@@ -0,0 +1,178 @@
1
+ Metadata-Version: 2.4
2
+ Name: dug-cli
3
+ Version: 0.1.0
4
+ Summary: Dig into any bug with full codebase context — zero LLM calls
5
+ Project-URL: Homepage, https://github.com/ratishjain12/dug
6
+ Project-URL: Repository, https://github.com/ratishjain12/dug
7
+ Project-URL: Bug Tracker, https://github.com/ratishjain12/dug/issues
8
+ Author-email: Ratish Jain <ratishjain6@gmail.com>
9
+ License: MIT
10
+ License-File: LICENSE
11
+ Keywords: claude,cli,code-search,debugging,developer-tools
12
+ Classifier: Development Status :: 4 - Beta
13
+ Classifier: Environment :: Console
14
+ Classifier: Intended Audience :: Developers
15
+ Classifier: License :: OSI Approved :: MIT License
16
+ Classifier: Programming Language :: Python :: 3
17
+ Classifier: Programming Language :: Python :: 3.10
18
+ Classifier: Programming Language :: Python :: 3.11
19
+ Classifier: Programming Language :: Python :: 3.12
20
+ Classifier: Topic :: Software Development :: Debuggers
21
+ Requires-Python: >=3.10
22
+ Requires-Dist: click
23
+ Requires-Dist: lancedb
24
+ Requires-Dist: networkx
25
+ Requires-Dist: tree-sitter-java
26
+ Requires-Dist: tree-sitter-javascript
27
+ Requires-Dist: tree-sitter-python
28
+ Requires-Dist: tree-sitter-typescript>=0.23.2
29
+ Requires-Dist: tree-sitter>=0.22
30
+ Requires-Dist: watchdog
31
+ Provides-Extra: all
32
+ Requires-Dist: openai; extra == 'all'
33
+ Requires-Dist: sentence-transformers; extra == 'all'
34
+ Provides-Extra: local
35
+ Requires-Dist: sentence-transformers; extra == 'local'
36
+ Provides-Extra: openai
37
+ Requires-Dist: openai; extra == 'openai'
38
+ Description-Content-Type: text/markdown
39
+
40
+ # dug
41
+
42
+ **Dig into any bug with full codebase context — zero LLM calls.**
43
+
44
+ [![PyPI version](https://img.shields.io/pypi/v/dug-cli)](https://pypi.org/project/dug-cli/)
45
+ [![License: MIT](https://img.shields.io/badge/License-MIT-blue.svg)](LICENSE)
46
+ [![Python 3.10+](https://img.shields.io/badge/python-3.10%2B-blue)](https://pypi.org/project/dug-cli/)
47
+
48
+ `dug` takes a bug report or stack trace and generates a structured [Claude Code](https://claude.ai/code) prompt that includes the exact files, functions, and context needed to fix it — using grep, AST parsing, and a local vector index with **no API calls and no LLM required**.
49
+
50
+ ---
51
+
52
+ ## Install
53
+
54
+ ```sh
55
+ # Recommended
56
+ pipx install dug-cli
57
+
58
+ # macOS (Homebrew)
59
+ brew tap ratishjain12/dug
60
+ brew install dug-cli
61
+
62
+ # One-liner (Linux / macOS)
63
+ curl -fsSL https://raw.githubusercontent.com/ratishjain12/dug/main/install.sh | sh
64
+
65
+ # Inside a virtualenv
66
+ pip install dug-cli
67
+ ```
68
+
69
+ ---
70
+
71
+ ## Quick start
72
+
73
+ ```sh
74
+ # 1. Run once in your repo root to build the index
75
+ cd /your/project
76
+ dug init
77
+
78
+ # 2. Paste any bug report or stack trace
79
+ dug "NullPointerException in UserService.authenticate at line 42"
80
+
81
+ # dug prints a ready-to-paste Claude Code prompt with ranked file context
82
+ ```
83
+
84
+ **Sample output:**
85
+
86
+ ```
87
+ You are a senior engineer debugging this issue:
88
+
89
+ NullPointerException in UserService.authenticate at line 42
90
+
91
+ Relevant files (ranked by relevance):
92
+
93
+ 1. src/auth/UserService.java:35
94
+ authenticate() — modified 2 commits ago
95
+ ...
96
+
97
+ 2. src/config/AppConfig.java:12
98
+ loadConfig() — error pattern match
99
+ ...
100
+
101
+ [full function bodies + graph context follow]
102
+ ```
103
+
104
+ ---
105
+
106
+ ## How it works
107
+
108
+ `dug` builds a **local knowledge base** the first time you run `dug init`:
109
+
110
+ | Layer | What it builds | Used for |
111
+ |---|---|---|
112
+ | Structural graph | File → Symbol → Commit nodes (networkx) | Import chains, recent changes |
113
+ | Semantic index | Function embeddings in LanceDB (sentence-transformers) | Meaning-level matches |
114
+ | History log | Past bug→fix pairs | Learning from outcomes |
115
+
116
+ At query time, three signals are combined into a ranked list:
117
+
118
+ - **Structural score** — imports your error file, was modified in a related commit
119
+ - **Semantic score** — cosine similarity between bug text and function bodies
120
+ - **History boost** — similar past bugs pointed here
121
+
122
+ The index stays fresh via git hooks (`post-commit`, `post-checkout`) and an optional file watcher.
123
+
124
+ ---
125
+
126
+ ## Commands
127
+
128
+ | Command | What it does |
129
+ |---|---|
130
+ | `dug init` | Index the current repo (builds graph + embeddings) |
131
+ | `dug "error text"` | Generate a Claude Code prompt for the bug |
132
+ | `dug update` | Re-index files changed since last commit |
133
+ | `dug watch` | Watch for file saves and re-index in real time |
134
+ | `dug stats` | Show index size (nodes, edges, chunks) |
135
+ | `dug config` | View / edit configuration |
136
+ | `dug feedback good` | Mark last query as helpful (improves future results) |
137
+ | `dug feedback bad` | Mark last query as unhelpful |
138
+
139
+ ### Options
140
+
141
+ ```sh
142
+ dug init --local # Use local embeddings (default, no API key needed)
143
+ dug init --openai # Use OpenAI text-embedding-3-small (needs OPENAI_API_KEY)
144
+ dug "error" --files 3 # Limit to top 3 files in prompt
145
+ dug "error" --no-history # Skip learning loop context
146
+ ```
147
+
148
+ ---
149
+
150
+ ## Configuration
151
+
152
+ `dug init` creates `.dug/config.json` in the repo root. You can also edit it with `dug config set <key> <value>`.
153
+
154
+ ```json
155
+ {
156
+ "embedding_mode": "local",
157
+ "languages": ["python", "java", "typescript", "javascript"],
158
+ "max_files_in_prompt": 5,
159
+ "git_history_depth": 50,
160
+ "exclude_test_files": true
161
+ }
162
+ ```
163
+
164
+ `.dug/` is automatically added to `.gitignore` — it's machine-specific and never committed.
165
+
166
+ ---
167
+
168
+ ## Contributing
169
+
170
+ ```sh
171
+ git clone https://github.com/ratishjain12/dug
172
+ cd dug
173
+ uv sync
174
+ uv run dug init # index the dug repo itself
175
+ uv run dug "your bug here"
176
+ ```
177
+
178
+ Requires Python 3.10+ and [uv](https://docs.astral.sh/uv/).