dug-cli 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
dug/graph.py ADDED
@@ -0,0 +1,423 @@
1
+ """Structural knowledge graph — FILE, SYMBOL, and COMMIT nodes with edges."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import json
6
+ import re
7
+ import subprocess
8
+ from dataclasses import dataclass, field
9
+ from pathlib import Path
10
+
11
+ import networkx as nx
12
+
13
+ from .git_context import Commit, get_git_history
14
+
15
+ # ---------------------------------------------------------------------------
16
+ # Language helpers
17
+ # ---------------------------------------------------------------------------
18
+
19
+ LANG_EXTENSIONS: dict[str, list[str]] = {
20
+ "python": [".py"],
21
+ "java": [".java"],
22
+ "typescript": [".ts", ".tsx"],
23
+ "javascript": [".js", ".jsx"],
24
+ }
25
+
26
+ IMPORT_PATTERNS: dict[str, list[str]] = {
27
+ "python": [r"^import\s+\S+", r"^from\s+\S+\s+import"],
28
+ "java": [r"^import\s+\S+"],
29
+ "typescript": [r"^import\s+", r'require\('],
30
+ "javascript": [r"^import\s+", r'require\('],
31
+ }
32
+
33
+
34
+ def _ext_to_lang(ext: str) -> str | None:
35
+ for lang, exts in LANG_EXTENSIONS.items():
36
+ if ext in exts:
37
+ return lang
38
+ return None
39
+
40
+
41
+ # ---------------------------------------------------------------------------
42
+ # Node dataclasses (stored as node attributes in networkx)
43
+ # ---------------------------------------------------------------------------
44
+
45
+ @dataclass
46
+ class FileNode:
47
+ path: str # relative to repo root
48
+ language: str
49
+ last_modified: float
50
+ size: int
51
+
52
+
53
+ @dataclass
54
+ class SymbolNode:
55
+ name: str
56
+ kind: str # function / class / method
57
+ file_path: str
58
+ line_number: int
59
+
60
+
61
+ # ---------------------------------------------------------------------------
62
+ # Graph builder
63
+ # ---------------------------------------------------------------------------
64
+
65
+ class CodeGraph:
66
+ def __init__(self):
67
+ self.g: nx.DiGraph = nx.DiGraph()
68
+
69
+ # -- persistence -------------------------------------------------------
70
+
71
+ def save(self, path: Path) -> None:
72
+ data = nx.node_link_data(self.g)
73
+ path.parent.mkdir(parents=True, exist_ok=True)
74
+ with open(path, "w") as f:
75
+ json.dump(data, f, indent=2)
76
+
77
+ def load(self, path: Path) -> None:
78
+ if not path.exists():
79
+ return
80
+ with open(path) as f:
81
+ data = json.load(f)
82
+ self.g = nx.node_link_graph(data)
83
+
84
+ # -- file nodes --------------------------------------------------------
85
+
86
+ def add_file(self, path: Path, root: Path) -> str:
87
+ rel = str(path.relative_to(root))
88
+ lang = _ext_to_lang(path.suffix) or "unknown"
89
+ stat = path.stat()
90
+ node_id = f"file:{rel}"
91
+ self.g.add_node(
92
+ node_id,
93
+ kind="FILE",
94
+ path=rel,
95
+ language=lang,
96
+ last_modified=stat.st_mtime,
97
+ size=stat.st_size,
98
+ )
99
+ return node_id
100
+
101
+ def file_nodes(self) -> list[dict]:
102
+ return [
103
+ {"id": n, **d}
104
+ for n, d in self.g.nodes(data=True)
105
+ if d.get("kind") == "FILE"
106
+ ]
107
+
108
+ # -- symbol nodes ------------------------------------------------------
109
+
110
+ def add_symbol(self, name: str, kind: str, file_path: str, line: int) -> str:
111
+ node_id = f"sym:{file_path}:{name}:{line}"
112
+ self.g.add_node(
113
+ node_id,
114
+ kind="SYMBOL",
115
+ name=name,
116
+ symbol_kind=kind,
117
+ file_path=file_path,
118
+ line_number=line,
119
+ )
120
+ file_id = f"file:{file_path}"
121
+ if self.g.has_node(file_id):
122
+ self.g.add_edge(file_id, node_id, rel="contains")
123
+ return node_id
124
+
125
+ # -- import edges ------------------------------------------------------
126
+
127
+ def add_import_edge(self, from_file: str, to_file: str) -> None:
128
+ src = f"file:{from_file}"
129
+ dst = f"file:{to_file}"
130
+ if self.g.has_node(src) and self.g.has_node(dst):
131
+ self.g.add_edge(src, dst, rel="imports")
132
+
133
+ # -- commit nodes ------------------------------------------------------
134
+
135
+ def add_commit(self, commit: Commit, root: Path) -> str:
136
+ node_id = f"commit:{commit.hash}"
137
+ self.g.add_node(
138
+ node_id,
139
+ kind="COMMIT",
140
+ hash=commit.hash,
141
+ message=commit.message,
142
+ timestamp=commit.timestamp.isoformat(),
143
+ )
144
+ for rel_path in commit.files_touched:
145
+ file_id = f"file:{rel_path}"
146
+ if self.g.has_node(file_id):
147
+ self.g.add_edge(node_id, file_id, rel="modified")
148
+ return node_id
149
+
150
+ # -- lookup helpers ----------------------------------------------------
151
+
152
+ def find_file_nodes_for_symbol(self, symbol: str) -> list[str]:
153
+ results = []
154
+ for n, d in self.g.nodes(data=True):
155
+ if d.get("kind") == "SYMBOL" and d.get("name") == symbol:
156
+ file_id = f"file:{d['file_path']}"
157
+ if file_id not in results:
158
+ results.append(file_id)
159
+ return results
160
+
161
+ def get_import_neighbors(self, file_id: str, hops: int = 2) -> dict[str, int]:
162
+ """Return file_ids reachable within `hops` import edges, with hop distance."""
163
+ visited: dict[str, int] = {}
164
+ frontier = [file_id]
165
+ for hop in range(1, hops + 1):
166
+ next_frontier = []
167
+ for node in frontier:
168
+ for neighbor in list(self.g.successors(node)) + list(self.g.predecessors(node)):
169
+ if self.g.nodes[neighbor].get("kind") == "FILE" and neighbor not in visited:
170
+ visited[neighbor] = hop
171
+ next_frontier.append(neighbor)
172
+ frontier = next_frontier
173
+ return visited
174
+
175
+ def stats(self) -> dict:
176
+ kinds: dict[str, int] = {}
177
+ for _, d in self.g.nodes(data=True):
178
+ k = d.get("kind", "UNKNOWN")
179
+ kinds[k] = kinds.get(k, 0) + 1
180
+ return {"nodes": dict(kinds), "edges": self.g.number_of_edges()}
181
+
182
+ # -- incremental update helpers ----------------------------------------
183
+
184
+ def remove_file_data(self, rel_path: str) -> None:
185
+ """Remove FILE node, all its SYMBOL nodes, and all their edges."""
186
+ file_id = f"file:{rel_path}"
187
+
188
+ sym_nodes = [
189
+ n for n, d in self.g.nodes(data=True)
190
+ if d.get("kind") == "SYMBOL" and d.get("file_path") == rel_path
191
+ ]
192
+ for sym in sym_nodes:
193
+ self.g.remove_node(sym)
194
+
195
+ if self.g.has_node(file_id):
196
+ self.g.remove_node(file_id) # networkx removes all edges automatically
197
+
198
+ def update_file_data(self, file_path: Path, root: Path,
199
+ all_file_rels: set[str]) -> None:
200
+ """Remove stale data for a file then re-add fresh nodes and edges."""
201
+ rel = str(file_path.relative_to(root))
202
+ self.remove_file_data(rel)
203
+
204
+ if not file_path.exists():
205
+ return
206
+
207
+ self.add_file(file_path, root)
208
+
209
+ lang = _ext_to_lang(file_path.suffix)
210
+ if not lang:
211
+ return
212
+
213
+ for sym in extract_symbols_ripgrep(file_path, root):
214
+ self.add_symbol(sym["name"], sym["kind"], sym["file"], sym["line"])
215
+
216
+ for imp in extract_imports(file_path, root, lang):
217
+ target = _resolve_import_to_file(imp, all_file_rels, lang)
218
+ if target and target != rel:
219
+ self.add_import_edge(rel, target)
220
+
221
+ def prune_stale_nodes(self, root: Path) -> list[str]:
222
+ """Remove FILE nodes whose path no longer exists on disk."""
223
+ stale = [
224
+ d["path"]
225
+ for _, d in list(self.g.nodes(data=True))
226
+ if d.get("kind") == "FILE" and not (root / d.get("path", "")).exists()
227
+ ]
228
+ for rel_path in stale:
229
+ self.remove_file_data(rel_path)
230
+ return stale
231
+
232
+
233
+ # ---------------------------------------------------------------------------
234
+ # Walk + symbol extraction
235
+ # ---------------------------------------------------------------------------
236
+
237
+ def _should_ignore(path: Path, ignore_paths: list[str]) -> bool:
238
+ path_str = str(path)
239
+ return any(ig in path.parts or ig in path_str for ig in ignore_paths)
240
+
241
+
242
+ def walk_repo(root: Path, ignore_paths: list[str], languages: list[str]) -> list[Path]:
243
+ valid_exts: set[str] = set()
244
+ for lang in languages:
245
+ valid_exts.update(LANG_EXTENSIONS.get(lang, []))
246
+
247
+ files = []
248
+ for p in root.rglob("*"):
249
+ if p.is_file() and p.suffix in valid_exts and not _should_ignore(p, ignore_paths):
250
+ files.append(p)
251
+ return files
252
+
253
+
254
+ def extract_symbols_ctags(root: Path) -> list[dict]:
255
+ """Run universal-ctags and parse JSON output."""
256
+ try:
257
+ result = subprocess.run(
258
+ ["ctags", "-R", "--output-format=json", "--fields=+n", "."],
259
+ capture_output=True,
260
+ text=True,
261
+ cwd=root,
262
+ timeout=10,
263
+ )
264
+ except (FileNotFoundError, subprocess.TimeoutExpired):
265
+ return []
266
+
267
+ symbols = []
268
+ for line in result.stdout.splitlines():
269
+ line = line.strip()
270
+ if not line:
271
+ continue
272
+ try:
273
+ obj = json.loads(line)
274
+ if obj.get("_type") != "tag":
275
+ continue
276
+ symbols.append({
277
+ "name": obj.get("name", ""),
278
+ "kind": obj.get("kind", "unknown"),
279
+ "file": obj.get("path", ""),
280
+ "line": obj.get("line", 0),
281
+ })
282
+ except json.JSONDecodeError:
283
+ continue
284
+ return symbols
285
+
286
+
287
+ def extract_symbols_ripgrep(file_path: Path, root: Path) -> list[dict]:
288
+ """Fallback symbol extraction using ripgrep patterns."""
289
+ patterns = [
290
+ (r"^def ([A-Za-z_]\w*)\s*\(", "function"),
291
+ (r"^class ([A-Za-z_]\w*)\s*[:(]", "class"),
292
+ (r"^function ([A-Za-z_]\w*)\s*\(", "function"),
293
+ (r"^\s+(?:public|private|protected)\s+\w+\s+([A-Za-z_]\w*)\s*\(", "method"),
294
+ ]
295
+ rel = str(file_path.relative_to(root))
296
+ symbols = []
297
+ for pattern, kind in patterns:
298
+ try:
299
+ result = subprocess.run(
300
+ ["rg", "--line-number", "--no-heading", pattern, str(file_path)],
301
+ capture_output=True,
302
+ text=True,
303
+ )
304
+ for line in result.stdout.splitlines():
305
+ parts = line.split(":", 1)
306
+ if len(parts) < 2:
307
+ continue
308
+ try:
309
+ lineno = int(parts[0])
310
+ except ValueError:
311
+ continue
312
+ content = parts[1]
313
+ match = re.search(pattern, content)
314
+ if match:
315
+ symbols.append({
316
+ "name": match.group(1),
317
+ "kind": kind,
318
+ "file": rel,
319
+ "line": lineno,
320
+ })
321
+ except FileNotFoundError:
322
+ break
323
+ return symbols
324
+
325
+
326
+ def extract_imports(file_path: Path, root: Path, language: str) -> list[str]:
327
+ """Return list of imported module/file strings found in `file_path`."""
328
+ patterns = IMPORT_PATTERNS.get(language, [])
329
+ imports = []
330
+ for pattern in patterns:
331
+ try:
332
+ result = subprocess.run(
333
+ ["rg", "--no-heading", "--no-line-number", pattern, str(file_path)],
334
+ capture_output=True,
335
+ text=True,
336
+ )
337
+ for line in result.stdout.splitlines():
338
+ imports.append(line.strip())
339
+ except FileNotFoundError:
340
+ break
341
+ return imports
342
+
343
+
344
+ def _resolve_import_to_file(import_line: str, all_file_rels: set[str], language: str) -> str | None:
345
+ """Best-effort: map an import statement to a file node path."""
346
+ # Python: "from dug.graph import CodeGraph" → look for dug/graph.py
347
+ # Java: "import com.example.Foo" → com/example/Foo.java
348
+ # JS/TS: "import ... from './utils'" → utils.ts / utils.js
349
+ if language == "python":
350
+ m = re.search(r"^from ([\w.]+) import|^import ([\w.]+)", import_line)
351
+ if m:
352
+ mod = (m.group(1) or m.group(2)).replace(".", "/")
353
+ for ext in [".py"]:
354
+ candidate = mod + ext
355
+ if candidate in all_file_rels:
356
+ return candidate
357
+ elif language in ("typescript", "javascript"):
358
+ m = re.search(r"""from\s+['"]([^'"]+)['"]""", import_line)
359
+ if m:
360
+ raw = m.group(1)
361
+ for ext in [".ts", ".tsx", ".js", ".jsx"]:
362
+ candidate = raw.lstrip("./") + ext
363
+ for f in all_file_rels:
364
+ if f.endswith(candidate):
365
+ return f
366
+ elif language == "java":
367
+ m = re.search(r"^import\s+([\w.]+);", import_line)
368
+ if m:
369
+ candidate = m.group(1).replace(".", "/") + ".java"
370
+ if candidate in all_file_rels:
371
+ return candidate
372
+ return None
373
+
374
+
375
+ # ---------------------------------------------------------------------------
376
+ # Public build function
377
+ # ---------------------------------------------------------------------------
378
+
379
+ def build_graph(root: Path, config: dict) -> CodeGraph:
380
+ ignore_paths = config.get("ignore_paths", [])
381
+ languages = config.get("languages", [])
382
+ git_depth = config.get("git_history_depth", 50)
383
+
384
+ graph = CodeGraph()
385
+ files = walk_repo(root, ignore_paths, languages)
386
+
387
+ # FILE nodes
388
+ for f in files:
389
+ graph.add_file(f, root)
390
+
391
+ all_file_rels: set[str] = {str(f.relative_to(root)) for f in files}
392
+
393
+ # SYMBOL nodes — try ctags first, fall back to ripgrep per file
394
+ ctags_symbols = extract_symbols_ctags(root)
395
+ if ctags_symbols:
396
+ for sym in ctags_symbols:
397
+ rel = sym["file"]
398
+ if rel in all_file_rels:
399
+ graph.add_symbol(sym["name"], sym["kind"], rel, sym["line"])
400
+ else:
401
+ for f in files:
402
+ lang = _ext_to_lang(f.suffix)
403
+ if lang:
404
+ for sym in extract_symbols_ripgrep(f, root):
405
+ graph.add_symbol(sym["name"], sym["kind"], sym["file"], sym["line"])
406
+
407
+ # FILE→FILE import edges
408
+ for f in files:
409
+ lang = _ext_to_lang(f.suffix)
410
+ if not lang:
411
+ continue
412
+ imports = extract_imports(f, root, lang)
413
+ rel = str(f.relative_to(root))
414
+ for imp in imports:
415
+ target = _resolve_import_to_file(imp, all_file_rels, lang)
416
+ if target and target != rel:
417
+ graph.add_import_edge(rel, target)
418
+
419
+ # COMMIT nodes
420
+ for commit in get_git_history(root, depth=git_depth):
421
+ graph.add_commit(commit, root)
422
+
423
+ return graph
dug/history.py ADDED
@@ -0,0 +1,231 @@
1
+ """Learning loop — stores past bug→file resolutions and boosts similar future queries."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import hashlib
6
+ import json
7
+ from datetime import datetime, timezone
8
+ from difflib import SequenceMatcher
9
+ from pathlib import Path
10
+
11
+ from .config import get_dug_dir
12
+
13
+
14
+ # ---------------------------------------------------------------------------
15
+ # Persistence
16
+ # ---------------------------------------------------------------------------
17
+
18
+ def get_history_path() -> Path:
19
+ return get_dug_dir() / "history.json"
20
+
21
+
22
+ def get_last_query_path() -> Path:
23
+ return get_dug_dir() / "last_query.json"
24
+
25
+
26
+ def load_history() -> list[dict]:
27
+ p = get_history_path()
28
+ if not p.exists():
29
+ return []
30
+ with open(p) as f:
31
+ return json.load(f)
32
+
33
+
34
+ def save_history(entries: list[dict]) -> None:
35
+ p = get_history_path()
36
+ p.parent.mkdir(parents=True, exist_ok=True)
37
+ with open(p, "w") as f:
38
+ json.dump(entries, f, indent=2)
39
+
40
+
41
+ def save_last_query(bug_input: str, ranked_file_paths: list[str], signals: dict) -> None:
42
+ p = get_last_query_path()
43
+ p.parent.mkdir(parents=True, exist_ok=True)
44
+ with open(p, "w") as f:
45
+ json.dump({
46
+ "bug_input": bug_input,
47
+ "ranked_files": ranked_file_paths,
48
+ "signals": signals,
49
+ "timestamp": datetime.now(timezone.utc).isoformat(),
50
+ }, f, indent=2)
51
+
52
+
53
+ def load_last_query() -> dict | None:
54
+ p = get_last_query_path()
55
+ if not p.exists():
56
+ return None
57
+ with open(p) as f:
58
+ return json.load(f)
59
+
60
+
61
+ # ---------------------------------------------------------------------------
62
+ # Record a resolved bug
63
+ # ---------------------------------------------------------------------------
64
+
65
+ def record_resolved(bug_input: str, resolved_files: list[str], signals: dict) -> None:
66
+ """Append a resolved bug entry to history."""
67
+ entries = load_history()
68
+ entry_id = hashlib.md5(bug_input.encode()).hexdigest()
69
+
70
+ # Update existing entry if same bug was solved before
71
+ for entry in entries:
72
+ if entry["id"] == entry_id:
73
+ entry["resolved_files"] = list(dict.fromkeys(
74
+ entry["resolved_files"] + resolved_files
75
+ ))
76
+ entry["solve_count"] = entry.get("solve_count", 1) + 1
77
+ entry["last_solved"] = datetime.now(timezone.utc).isoformat()
78
+ save_history(entries)
79
+ return
80
+
81
+ entries.append({
82
+ "id": entry_id,
83
+ "bug_input": bug_input,
84
+ "error_type": signals.get("error_type"),
85
+ "signals": {
86
+ "files": signals.get("files", []),
87
+ "symbols": signals.get("symbols", []),
88
+ },
89
+ "resolved_files": resolved_files,
90
+ "solve_count": 1,
91
+ "last_solved": datetime.now(timezone.utc).isoformat(),
92
+ })
93
+ save_history(entries)
94
+
95
+
96
+ # ---------------------------------------------------------------------------
97
+ # Similarity matching
98
+ # ---------------------------------------------------------------------------
99
+
100
+ _STOPWORDS = {"the", "a", "an", "in", "at", "on", "is", "was", "with",
101
+ "and", "or", "for", "to", "of", "from", "that", "this",
102
+ "it", "not", "by", "be", "are", "has", "have", "had"}
103
+
104
+
105
+ def _word_tokens(text: str) -> set[str]:
106
+ """
107
+ Significant words from a string, with CamelCase and snake_case splitting.
108
+ 'NullPointerException' → {'null', 'pointer', 'exception'}
109
+ 'load_config' → {'load', 'config'}
110
+ """
111
+ import re
112
+ # Split CamelCase: NullPointerException → Null Pointer Exception
113
+ text = re.sub(r'([a-z])([A-Z])', r'\1 \2', text)
114
+ # Split on everything non-alpha (underscores, dots, colons, spaces, etc.)
115
+ words = re.findall(r'[a-zA-Z]+', text.lower())
116
+ return {w for w in words if len(w) > 3 and w not in _STOPWORDS}
117
+
118
+
119
+ def _text_similarity(a: str, b: str) -> float:
120
+ """Blend of character-level SequenceMatcher and word-level Jaccard."""
121
+ char_sim = SequenceMatcher(None, a.lower(), b.lower()).ratio()
122
+
123
+ words_a = _word_tokens(a)
124
+ words_b = _word_tokens(b)
125
+ union = words_a | words_b
126
+ word_sim = len(words_a & words_b) / len(union) if union else 0.0
127
+
128
+ return char_sim * 0.4 + word_sim * 0.6
129
+
130
+
131
+ def _signals_overlap(signals_a: dict, signals_b: dict) -> float:
132
+ """Fraction of shared files/symbols between two signal dicts (0–1)."""
133
+ files_a = set(signals_a.get("files", []))
134
+ files_b = set(signals_b.get("files", []))
135
+ syms_a = set(signals_a.get("symbols", []))
136
+ syms_b = set(signals_b.get("symbols", []))
137
+
138
+ total = len(files_a | files_b) + len(syms_a | syms_b)
139
+ if total == 0:
140
+ return 0.0
141
+ shared = len(files_a & files_b) + len(syms_a & syms_b)
142
+ return shared / total
143
+
144
+
145
+ def find_similar_past_bugs(
146
+ bug_input: str,
147
+ signals: dict,
148
+ threshold: float = 0.35,
149
+ ) -> list[dict]:
150
+ """
151
+ Return past entries that are similar to the current bug.
152
+ Combines text similarity + error type match + signal overlap.
153
+ """
154
+ entries = load_history()
155
+ similar = []
156
+
157
+ current_error = (signals.get("error_type") or "").lower()
158
+
159
+ for entry in entries:
160
+ text_sim = _text_similarity(bug_input, entry["bug_input"])
161
+
162
+ # Error type exact match gives a strong boost
163
+ entry_error = (entry.get("error_type") or "").lower()
164
+ error_bonus = 0.2 if current_error and current_error == entry_error else 0.0
165
+
166
+ sig_overlap = _signals_overlap(signals, entry.get("signals", {}))
167
+
168
+ score = text_sim * 0.6 + sig_overlap * 0.25 + error_bonus
169
+
170
+ if score >= threshold:
171
+ similar.append({**entry, "_similarity": round(score, 3)})
172
+
173
+ return sorted(similar, key=lambda x: x["_similarity"], reverse=True)
174
+
175
+
176
+ # ---------------------------------------------------------------------------
177
+ # Scoring boost
178
+ # ---------------------------------------------------------------------------
179
+
180
+ def get_history_boost(
181
+ bug_input: str,
182
+ signals: dict,
183
+ candidate_files: list[str],
184
+ ) -> dict[str, float]:
185
+ """
186
+ Return {file_path: boost_score} for files that resolved similar past bugs.
187
+ Boost is +6, scaled by similarity (so a 0.9-similar past bug gives +5.4).
188
+ """
189
+ similar = find_similar_past_bugs(bug_input, signals)
190
+ boosts: dict[str, float] = {}
191
+ for past in similar:
192
+ sim = past["_similarity"]
193
+ for filepath in past["resolved_files"]:
194
+ if filepath in candidate_files:
195
+ pts = 6.0 * sim
196
+ boosts[filepath] = max(boosts.get(filepath, 0.0), pts)
197
+ return boosts
198
+
199
+
200
+ # ---------------------------------------------------------------------------
201
+ # Error pattern library
202
+ # ---------------------------------------------------------------------------
203
+
204
+ def get_error_pattern_boost(
205
+ error_type: str | None,
206
+ candidate_files: list[str],
207
+ ) -> dict[str, float]:
208
+ """
209
+ Boost files that have historically appeared alongside a specific error type.
210
+ Derived entirely from accumulated history — no hardcoded rules.
211
+ """
212
+ if not error_type:
213
+ return {}
214
+
215
+ entries = load_history()
216
+ frequency: dict[str, int] = {}
217
+ total = 0
218
+
219
+ for entry in entries:
220
+ if (entry.get("error_type") or "").lower() == error_type.lower():
221
+ for fp in entry.get("resolved_files", []):
222
+ if fp in candidate_files:
223
+ frequency[fp] = frequency.get(fp, 0) + 1
224
+ total += 1
225
+
226
+ if total == 0:
227
+ return {}
228
+
229
+ # Normalize to 0–3 boost range
230
+ max_freq = max(frequency.values())
231
+ return {fp: (count / max_freq) * 3.0 for fp, count in frequency.items()}