codegraph-cli 2.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (43) hide show
  1. codegraph_cli/__init__.py +4 -0
  2. codegraph_cli/agents.py +191 -0
  3. codegraph_cli/bug_detector.py +386 -0
  4. codegraph_cli/chat_agent.py +352 -0
  5. codegraph_cli/chat_session.py +220 -0
  6. codegraph_cli/cli.py +330 -0
  7. codegraph_cli/cli_chat.py +367 -0
  8. codegraph_cli/cli_diagnose.py +133 -0
  9. codegraph_cli/cli_refactor.py +230 -0
  10. codegraph_cli/cli_setup.py +470 -0
  11. codegraph_cli/cli_test.py +177 -0
  12. codegraph_cli/cli_v2.py +267 -0
  13. codegraph_cli/codegen_agent.py +265 -0
  14. codegraph_cli/config.py +31 -0
  15. codegraph_cli/config_manager.py +341 -0
  16. codegraph_cli/context_manager.py +500 -0
  17. codegraph_cli/crew_agents.py +123 -0
  18. codegraph_cli/crew_chat.py +159 -0
  19. codegraph_cli/crew_tools.py +497 -0
  20. codegraph_cli/diff_engine.py +265 -0
  21. codegraph_cli/embeddings.py +241 -0
  22. codegraph_cli/graph_export.py +144 -0
  23. codegraph_cli/llm.py +642 -0
  24. codegraph_cli/models.py +47 -0
  25. codegraph_cli/models_v2.py +185 -0
  26. codegraph_cli/orchestrator.py +49 -0
  27. codegraph_cli/parser.py +800 -0
  28. codegraph_cli/performance_analyzer.py +223 -0
  29. codegraph_cli/project_context.py +230 -0
  30. codegraph_cli/rag.py +200 -0
  31. codegraph_cli/refactor_agent.py +452 -0
  32. codegraph_cli/security_scanner.py +366 -0
  33. codegraph_cli/storage.py +390 -0
  34. codegraph_cli/templates/graph_interactive.html +257 -0
  35. codegraph_cli/testgen_agent.py +316 -0
  36. codegraph_cli/validation_engine.py +285 -0
  37. codegraph_cli/vector_store.py +293 -0
  38. codegraph_cli-2.0.0.dist-info/METADATA +318 -0
  39. codegraph_cli-2.0.0.dist-info/RECORD +43 -0
  40. codegraph_cli-2.0.0.dist-info/WHEEL +5 -0
  41. codegraph_cli-2.0.0.dist-info/entry_points.txt +2 -0
  42. codegraph_cli-2.0.0.dist-info/licenses/LICENSE +21 -0
  43. codegraph_cli-2.0.0.dist-info/top_level.txt +1 -0
@@ -0,0 +1,223 @@
1
+ """Performance issue analyzer."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import ast
6
+ from typing import Dict, List, Set
7
+
8
+ from .storage import GraphStore
9
+
10
+
11
+ class PerformanceAnalyzer:
12
+ """Analyze code for performance issues."""
13
+
14
+ def __init__(self, store: GraphStore):
15
+ self.store = store
16
+
17
+ # Database-related method names that indicate queries
18
+ self.query_methods = {
19
+ "execute", "executemany", "query", "get", "filter",
20
+ "all", "first", "one", "fetch", "fetchone", "fetchall",
21
+ "select", "insert", "update", "delete"
22
+ }
23
+
24
+ def analyze_file(self, file_path: str) -> List[Dict]:
25
+ """Analyze file for performance issues.
26
+
27
+ Args:
28
+ file_path: Path to file to analyze
29
+
30
+ Returns:
31
+ List of performance issue dictionaries
32
+ """
33
+ issues = []
34
+
35
+ nodes = [n for n in self.store.get_nodes() if n["file_path"] == file_path]
36
+ # Skip module-level node when function/class nodes exist to avoid dupes
37
+ has_children = any(n["node_type"] != "module" for n in nodes)
38
+ if has_children:
39
+ nodes = [n for n in nodes if n["node_type"] != "module"]
40
+
41
+ seen: set = set() # (line, type) dedup
42
+
43
+ for node in nodes:
44
+ try:
45
+ tree = ast.parse(node["code"])
46
+ except SyntaxError:
47
+ continue
48
+
49
+ for issue in (
50
+ self._detect_n_plus_one(tree, node)
51
+ + self._detect_inefficient_algorithms(tree, node)
52
+ + self._detect_memory_issues(tree, node)
53
+ ):
54
+ key = (issue["line"], issue["type"])
55
+ if key not in seen:
56
+ seen.add(key)
57
+ issues.append(issue)
58
+
59
+ return issues
60
+
61
+ def _detect_n_plus_one(self, tree: ast.AST, node: Dict) -> List[Dict]:
62
+ """Detect N+1 query patterns."""
63
+ issues = []
64
+
65
+ for ast_node in ast.walk(tree):
66
+ # Look for loops with database queries inside
67
+ if isinstance(ast_node, (ast.For, ast.While)):
68
+ # Check for query calls inside the loop
69
+ for inner_node in ast.walk(ast_node):
70
+ if isinstance(inner_node, ast.Call):
71
+ # Check for query-like method names
72
+ if isinstance(inner_node.func, ast.Attribute):
73
+ if inner_node.func.attr in self.query_methods:
74
+ issues.append({
75
+ "type": "n_plus_one_query",
76
+ "severity": "high",
77
+ "line": node["start_line"] + inner_node.lineno - 1,
78
+ "message": "Potential N+1 query pattern: database query inside loop",
79
+ "suggestion": "Use bulk queries, eager loading, or prefetch_related()",
80
+ "code_snippet": ast.unparse(inner_node)[:100]
81
+ })
82
+ break # Only report once per loop
83
+
84
+ return issues
85
+
86
+ def _detect_inefficient_algorithms(self, tree: ast.AST, node: Dict) -> List[Dict]:
87
+ """Detect inefficient algorithm patterns."""
88
+ issues = []
89
+
90
+ # Track nested loops for O(n²) detection
91
+ loop_depth = {}
92
+
93
+ for ast_node in ast.walk(tree):
94
+ if isinstance(ast_node, (ast.For, ast.While)):
95
+ # Check for nested loops
96
+ nested_loops = []
97
+ for inner in ast.walk(ast_node):
98
+ if inner != ast_node and isinstance(inner, (ast.For, ast.While)):
99
+ nested_loops.append(inner)
100
+
101
+ if nested_loops:
102
+ # Report the innermost nested loop
103
+ innermost = nested_loops[0]
104
+ issues.append({
105
+ "type": "nested_loop",
106
+ "severity": "medium",
107
+ "line": node["start_line"] + innermost.lineno - 1,
108
+ "message": "Nested loop detected (O(n²) complexity)",
109
+ "suggestion": "Consider using hash maps, sets, or optimizing algorithm",
110
+ "code_snippet": f"for ... in ...: for ... in ..."
111
+ })
112
+
113
+ # Detect list operations in loops (inefficient)
114
+ if isinstance(ast_node, ast.For):
115
+ for inner in ast.walk(ast_node):
116
+ # Check for list.append in loop with large iterations
117
+ if isinstance(inner, ast.Call):
118
+ if isinstance(inner.func, ast.Attribute):
119
+ # Detect repeated string concatenation
120
+ if inner.func.attr == "append":
121
+ # Check if it's appending to a list that's later joined
122
+ # This is actually efficient, so skip
123
+ pass
124
+
125
+ # Detect list.insert(0, ...) which is O(n)
126
+ elif inner.func.attr == "insert":
127
+ if inner.args and isinstance(inner.args[0], ast.Constant):
128
+ if inner.args[0].value == 0:
129
+ issues.append({
130
+ "type": "inefficient_operation",
131
+ "severity": "medium",
132
+ "line": node["start_line"] + inner.lineno - 1,
133
+ "message": "list.insert(0, ...) in loop is O(n²)",
134
+ "suggestion": "Use collections.deque or append and reverse",
135
+ "code_snippet": ast.unparse(inner)[:100]
136
+ })
137
+
138
+ # Detect string concatenation in loops
139
+ for ast_node in ast.walk(tree):
140
+ if isinstance(ast_node, (ast.For, ast.While)):
141
+ for inner in ast.walk(ast_node):
142
+ # Look for += on strings
143
+ if isinstance(inner, ast.AugAssign):
144
+ if isinstance(inner.op, ast.Add):
145
+ # Check if target is likely a string
146
+ if isinstance(inner.target, ast.Name):
147
+ issues.append({
148
+ "type": "string_concatenation_loop",
149
+ "severity": "low",
150
+ "line": node["start_line"] + inner.lineno - 1,
151
+ "message": "String concatenation in loop (inefficient)",
152
+ "suggestion": "Use list.append() and ''.join() instead",
153
+ "code_snippet": ast.unparse(inner)[:100]
154
+ })
155
+
156
+ return issues
157
+
158
+ def _detect_memory_issues(self, tree: ast.AST, node: Dict) -> List[Dict]:
159
+ """Detect memory inefficiencies."""
160
+ issues = []
161
+ seen_lines: set = set()
162
+
163
+ for ast_node in ast.walk(tree):
164
+ report_line = node["start_line"] + ast_node.lineno - 1 if hasattr(ast_node, 'lineno') else None
165
+
166
+ # Detect large list comprehensions that could be generators
167
+ if isinstance(ast_node, ast.ListComp):
168
+ if report_line and report_line not in seen_lines:
169
+ seen_lines.add(report_line)
170
+ issues.append({
171
+ "type": "memory_inefficiency",
172
+ "severity": "low",
173
+ "line": report_line,
174
+ "message": "List comprehension could be a generator expression",
175
+ "suggestion": "Use (...) instead of [...] if you only iterate once",
176
+ "code_snippet": ast.unparse(ast_node)[:100]
177
+ })
178
+
179
+ # Detect reading entire file into memory
180
+ if isinstance(ast_node, ast.Call):
181
+ if isinstance(ast_node.func, ast.Attribute):
182
+ # file.read() without size limit
183
+ if ast_node.func.attr == "read" and not ast_node.args:
184
+ issues.append({
185
+ "type": "memory_inefficiency",
186
+ "severity": "medium",
187
+ "line": node["start_line"] + ast_node.lineno - 1,
188
+ "message": "Reading entire file into memory",
189
+ "suggestion": "Read in chunks or iterate line by line",
190
+ "code_snippet": ast.unparse(ast_node)[:100]
191
+ })
192
+
193
+ # .readlines() also loads entire file
194
+ elif ast_node.func.attr == "readlines":
195
+ issues.append({
196
+ "type": "memory_inefficiency",
197
+ "severity": "medium",
198
+ "line": node["start_line"] + ast_node.lineno - 1,
199
+ "message": "readlines() loads entire file into memory",
200
+ "suggestion": "Iterate over file object directly: for line in file:",
201
+ "code_snippet": ast.unparse(ast_node)[:100]
202
+ })
203
+
204
+ return issues
205
+
206
+ def analyze_project(self) -> Dict[str, List[Dict]]:
207
+ """Analyze entire project for performance issues.
208
+
209
+ Returns:
210
+ Dictionary mapping file paths to lists of performance issues
211
+ """
212
+ results = {}
213
+
214
+ # Get all unique file paths
215
+ all_nodes = self.store.get_nodes()
216
+ file_paths = set(node["file_path"] for node in all_nodes)
217
+
218
+ for file_path in file_paths:
219
+ issues = self.analyze_file(file_path)
220
+ if issues:
221
+ results[file_path] = issues
222
+
223
+ return results
@@ -0,0 +1,230 @@
1
+ """Project context manager with real file system access."""
2
+
3
+ from __future__ import annotations
4
+
5
+ from datetime import datetime
6
+ from pathlib import Path
7
+ from typing import Dict, List, Optional
8
+
9
+ from .storage import GraphStore, ProjectManager
10
+
11
+
12
+ class ProjectContext:
13
+ """Unified context for project with real file access and code graph."""
14
+
15
+ def __init__(self, project_name: str, project_manager: ProjectManager):
16
+ """Initialize project context.
17
+
18
+ Args:
19
+ project_name: Name of the project
20
+ project_manager: ProjectManager instance
21
+ """
22
+ self.project_name = project_name
23
+ self.project_manager = project_manager
24
+ self.project_dir = project_manager.project_dir(project_name)
25
+ self.store = GraphStore(self.project_dir)
26
+ self.metadata = self.store.get_metadata()
27
+
28
+ # Get source path from metadata
29
+ source_path_str = self.metadata.get("source_path")
30
+ if source_path_str:
31
+ self.source_path = Path(source_path_str)
32
+ if not self.source_path.exists():
33
+ # Source path moved or deleted
34
+ self.source_path = None
35
+ else:
36
+ # Old project without source_path
37
+ self.source_path = None
38
+
39
+ @property
40
+ def has_source_access(self) -> bool:
41
+ """Check if we have access to the original source directory."""
42
+ return self.source_path is not None and self.source_path.exists()
43
+
44
+ # File System Operations
45
+
46
+ def list_directory(self, rel_path: str = ".") -> List[Dict]:
47
+ """List files and directories in project.
48
+
49
+ Args:
50
+ rel_path: Relative path from project root
51
+
52
+ Returns:
53
+ List of file/directory info dicts
54
+
55
+ Raises:
56
+ RuntimeError: If source path not available
57
+ """
58
+ if not self.has_source_access:
59
+ raise RuntimeError(
60
+ f"Source path not available for project '{self.project_name}'. "
61
+ "Re-index the project to enable file access."
62
+ )
63
+
64
+ full_path = self.source_path / rel_path
65
+ if not full_path.exists():
66
+ return []
67
+
68
+ if not full_path.is_dir():
69
+ raise ValueError(f"Path is not a directory: {rel_path}")
70
+
71
+ items = []
72
+ for item in sorted(full_path.iterdir()):
73
+ try:
74
+ stat = item.stat()
75
+ items.append({
76
+ "name": item.name,
77
+ "type": "dir" if item.is_dir() else "file",
78
+ "size": stat.st_size if item.is_file() else None,
79
+ "path": str(item.relative_to(self.source_path)),
80
+ "modified": datetime.fromtimestamp(stat.st_mtime).isoformat()
81
+ })
82
+ except (OSError, PermissionError):
83
+ # Skip files we can't access
84
+ continue
85
+
86
+ return items
87
+
88
+ def read_file(self, rel_path: str) -> str:
89
+ """Read file contents from project.
90
+
91
+ Args:
92
+ rel_path: Relative path to file
93
+
94
+ Returns:
95
+ File contents as string
96
+
97
+ Raises:
98
+ RuntimeError: If source path not available
99
+ FileNotFoundError: If file doesn't exist
100
+ """
101
+ if not self.has_source_access:
102
+ raise RuntimeError(
103
+ f"Source path not available for project '{self.project_name}'. "
104
+ "Re-index the project to enable file access."
105
+ )
106
+
107
+ full_path = self.source_path / rel_path
108
+ if not full_path.exists():
109
+ raise FileNotFoundError(f"File not found: {rel_path}")
110
+
111
+ if not full_path.is_file():
112
+ raise ValueError(f"Path is not a file: {rel_path}")
113
+
114
+ try:
115
+ return full_path.read_text(encoding="utf-8")
116
+ except UnicodeDecodeError:
117
+ # Try reading as binary for non-text files
118
+ return f"[Binary file: {full_path.suffix}]"
119
+
120
+ def write_file(self, rel_path: str, content: str, create_dirs: bool = True) -> bool:
121
+ """Write or create a file in the project.
122
+
123
+ Args:
124
+ rel_path: Relative path to file
125
+ content: File content
126
+ create_dirs: Whether to create parent directories
127
+
128
+ Returns:
129
+ True if successful
130
+
131
+ Raises:
132
+ RuntimeError: If source path not available
133
+ """
134
+ if not self.has_source_access:
135
+ raise RuntimeError(
136
+ f"Source path not available for project '{self.project_name}'. "
137
+ "Re-index the project to enable file access."
138
+ )
139
+
140
+ full_path = self.source_path / rel_path
141
+
142
+ if create_dirs:
143
+ full_path.parent.mkdir(parents=True, exist_ok=True)
144
+
145
+ full_path.write_text(content, encoding="utf-8")
146
+ return True
147
+
148
+ def file_exists(self, rel_path: str) -> bool:
149
+ """Check if a file exists in the project.
150
+
151
+ Args:
152
+ rel_path: Relative path to file
153
+
154
+ Returns:
155
+ True if file exists
156
+ """
157
+ if not self.has_source_access:
158
+ return False
159
+
160
+ return (self.source_path / rel_path).exists()
161
+
162
+ def get_file_info(self, rel_path: str) -> Optional[Dict]:
163
+ """Get information about a file.
164
+
165
+ Args:
166
+ rel_path: Relative path to file
167
+
168
+ Returns:
169
+ File info dict or None if not found
170
+ """
171
+ if not self.has_source_access:
172
+ return None
173
+
174
+ full_path = self.source_path / rel_path
175
+ if not full_path.exists():
176
+ return None
177
+
178
+ try:
179
+ stat = full_path.stat()
180
+ return {
181
+ "name": full_path.name,
182
+ "path": rel_path,
183
+ "type": "dir" if full_path.is_dir() else "file",
184
+ "size": stat.st_size if full_path.is_file() else None,
185
+ "modified": datetime.fromtimestamp(stat.st_mtime).isoformat(),
186
+ "created": datetime.fromtimestamp(stat.st_ctime).isoformat()
187
+ }
188
+ except (OSError, PermissionError):
189
+ return None
190
+
191
+ # Code Graph Operations
192
+
193
+ def get_indexed_files(self) -> List[str]:
194
+ """Get list of indexed Python files.
195
+
196
+ Returns:
197
+ List of file paths that were indexed
198
+ """
199
+ nodes = self.store.get_nodes()
200
+ return sorted(set(node["file_path"] for node in nodes))
201
+
202
+ def get_project_summary(self) -> Dict:
203
+ """Get summary of project.
204
+
205
+ Returns:
206
+ Dictionary with project statistics
207
+ """
208
+ nodes = self.store.get_nodes()
209
+ edges = self.store.get_edges()
210
+
211
+ # Count by type
212
+ node_types = {}
213
+ for node in nodes:
214
+ node_type = node["node_type"]
215
+ node_types[node_type] = node_types.get(node_type, 0) + 1
216
+
217
+ return {
218
+ "project_name": self.project_name,
219
+ "source_path": str(self.source_path) if self.source_path else None,
220
+ "has_source_access": self.has_source_access,
221
+ "indexed_at": self.metadata.get("indexed_at"),
222
+ "total_nodes": len(nodes),
223
+ "total_edges": len(edges),
224
+ "node_types": node_types,
225
+ "indexed_files": len(self.get_indexed_files())
226
+ }
227
+
228
+ def close(self):
229
+ """Close the graph store connection."""
230
+ self.store.close()
codegraph_cli/rag.py ADDED
@@ -0,0 +1,200 @@
1
+ """Retrieval-Augmented components for semantic code search.
2
+
3
+ Uses LanceDB hybrid search (vector + metadata filters) for fast,
4
+ accurate code retrieval. Falls back to brute-force cosine similarity
5
+ when the vector store is unavailable.
6
+ """
7
+
8
+ from __future__ import annotations
9
+
10
+ import json
11
+ import logging
12
+ from typing import Any, Dict, List, Optional, Union
13
+
14
+ from .embeddings import HashEmbeddingModel, NeuralEmbedder, cosine_similarity
15
+ from .models import SearchResult
16
+ from .storage import GraphStore
17
+
18
+ logger = logging.getLogger(__name__)
19
+
20
+
21
+ class RAGRetriever:
22
+ """Retrieve relevant code nodes from graph memory via semantic similarity.
23
+
24
+ Supports two modes:
25
+
26
+ 1. **Vector store mode** (fast, preferred) – delegates to LanceDB via
27
+ ``GraphStore.vector_store``.
28
+ 2. **Brute-force mode** (fallback) – scans all SQLite rows and computes
29
+ cosine similarity in Python.
30
+
31
+ The ``embedding_model`` argument accepts either a
32
+ :class:`~codegraph_cli.embeddings.NeuralEmbedder` or the legacy
33
+ :class:`~codegraph_cli.embeddings.HashEmbeddingModel`.
34
+ """
35
+
36
+ def __init__(
37
+ self,
38
+ store: GraphStore,
39
+ embedding_model: Union[NeuralEmbedder, HashEmbeddingModel, Any],
40
+ ) -> None:
41
+ self.store = store
42
+ self.embedding_model = embedding_model
43
+ self.use_vector_store: bool = store.vector_store is not None
44
+
45
+ # ------------------------------------------------------------------
46
+ # Primary search
47
+ # ------------------------------------------------------------------
48
+
49
+ def search(
50
+ self,
51
+ query: str,
52
+ top_k: int = 5,
53
+ node_type: Optional[str] = None,
54
+ file_filter: Optional[str] = None,
55
+ ) -> List[SearchResult]:
56
+ """Semantic search for code nodes.
57
+
58
+ Args:
59
+ query: Natural-language or code query.
60
+ top_k: Number of results.
61
+ node_type: Optional filter (``function``, ``class``, ``module``).
62
+ file_filter: Optional file-path SQL pattern,
63
+ e.g. ``"src/%"`` to restrict to files under *src/*.
64
+
65
+ Returns:
66
+ List of :class:`SearchResult` sorted by relevance (highest first).
67
+ """
68
+ query_emb: List[float] = self.embedding_model.embed_text(query)
69
+
70
+ if self.use_vector_store:
71
+ return self._search_vector_store(
72
+ query_emb, top_k, node_type, file_filter,
73
+ )
74
+ return self._search_brute_force(query_emb, top_k, node_type)
75
+
76
+ # ------------------------------------------------------------------
77
+ # LanceDB path (fast)
78
+ # ------------------------------------------------------------------
79
+
80
+ def _search_vector_store(
81
+ self,
82
+ query_emb: List[float],
83
+ top_k: int,
84
+ node_type: Optional[str],
85
+ file_filter: Optional[str],
86
+ ) -> List[SearchResult]:
87
+ assert self.store.vector_store is not None
88
+
89
+ # Build SQL WHERE clause for hybrid search
90
+ clauses: List[str] = []
91
+ if node_type:
92
+ clauses.append(f'node_type = "{node_type}"')
93
+ if file_filter:
94
+ clauses.append(f'file_path LIKE "{file_filter}"')
95
+ where_sql = " AND ".join(clauses) if clauses else None
96
+
97
+ raw_results = self.store.vector_store.hybrid_search(
98
+ query_embedding=query_emb,
99
+ n_results=top_k,
100
+ where_sql=where_sql,
101
+ )
102
+
103
+ results: List[SearchResult] = []
104
+ for row in raw_results:
105
+ distance = row.get("_distance", 0.0)
106
+ # LanceDB returns L2 distance by default; convert to a similarity
107
+ # score in [0, 1]. For cosine distance the relationship is
108
+ # score = 1 - distance (since embeddings are unit-normalised).
109
+ score = max(0.0, 1.0 - distance)
110
+
111
+ # Enrich from SQLite if full node data is needed
112
+ node_row = self.store.get_node(row.get("id", ""))
113
+
114
+ if node_row is not None:
115
+ results.append(SearchResult(
116
+ node_id=node_row["node_id"],
117
+ score=score,
118
+ node_type=node_row["node_type"],
119
+ qualname=node_row["qualname"],
120
+ file_path=node_row["file_path"],
121
+ start_line=node_row["start_line"],
122
+ end_line=node_row["end_line"],
123
+ snippet=node_row["code"],
124
+ ))
125
+ else:
126
+ # Use data straight from LanceDB
127
+ results.append(SearchResult(
128
+ node_id=row.get("id", ""),
129
+ score=score,
130
+ node_type=row.get("node_type", ""),
131
+ qualname=row.get("qualname", ""),
132
+ file_path=row.get("file_path", ""),
133
+ start_line=0,
134
+ end_line=0,
135
+ snippet=row.get("document", ""),
136
+ ))
137
+
138
+ return results
139
+
140
+ # ------------------------------------------------------------------
141
+ # Brute-force fallback
142
+ # ------------------------------------------------------------------
143
+
144
+ def _search_brute_force(
145
+ self,
146
+ query_emb: List[float],
147
+ top_k: int,
148
+ node_type: Optional[str],
149
+ ) -> List[SearchResult]:
150
+ results: List[SearchResult] = []
151
+ for row in self.store.get_nodes():
152
+ if node_type and row["node_type"] != node_type:
153
+ continue
154
+ embedding = json.loads(row["embedding"] or "[]")
155
+ score = cosine_similarity(query_emb, embedding)
156
+ if score <= 0:
157
+ continue
158
+ results.append(SearchResult(
159
+ node_id=row["node_id"],
160
+ score=score,
161
+ node_type=row["node_type"],
162
+ qualname=row["qualname"],
163
+ file_path=row["file_path"],
164
+ start_line=row["start_line"],
165
+ end_line=row["end_line"],
166
+ snippet=row["code"],
167
+ ))
168
+
169
+ return sorted(results, key=lambda r: r.score, reverse=True)[:top_k]
170
+
171
+ # ------------------------------------------------------------------
172
+ # Convenience
173
+ # ------------------------------------------------------------------
174
+
175
+ def retrieve_context(
176
+ self,
177
+ query: str,
178
+ top_k: int = 5,
179
+ node_type: Optional[str] = None,
180
+ file_filter: Optional[str] = None,
181
+ ) -> str:
182
+ """Return a formatted string of the top search results.
183
+
184
+ Useful for injecting relevant code context into LLM prompts.
185
+ """
186
+ matches = self.search(
187
+ query, top_k=top_k, node_type=node_type, file_filter=file_filter,
188
+ )
189
+ if not matches:
190
+ return "No relevant nodes found."
191
+
192
+ blocks: List[str] = []
193
+ for item in matches:
194
+ blocks.append(
195
+ f"[{item.node_type}] {item.qualname} "
196
+ f"({item.file_path}:{item.start_line})\n"
197
+ f"Score: {item.score:.3f}\n"
198
+ f"```python\n{item.snippet[:1200]}\n```"
199
+ )
200
+ return "\n\n".join(blocks)