cognify-code 0.2.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (55) hide show
  1. ai_code_assistant/__init__.py +14 -0
  2. ai_code_assistant/agent/__init__.py +63 -0
  3. ai_code_assistant/agent/code_agent.py +461 -0
  4. ai_code_assistant/agent/code_generator.py +388 -0
  5. ai_code_assistant/agent/code_reviewer.py +365 -0
  6. ai_code_assistant/agent/diff_engine.py +308 -0
  7. ai_code_assistant/agent/file_manager.py +300 -0
  8. ai_code_assistant/agent/intent_classifier.py +284 -0
  9. ai_code_assistant/chat/__init__.py +11 -0
  10. ai_code_assistant/chat/agent_session.py +156 -0
  11. ai_code_assistant/chat/session.py +165 -0
  12. ai_code_assistant/cli.py +1571 -0
  13. ai_code_assistant/config.py +149 -0
  14. ai_code_assistant/editor/__init__.py +8 -0
  15. ai_code_assistant/editor/diff_handler.py +270 -0
  16. ai_code_assistant/editor/file_editor.py +350 -0
  17. ai_code_assistant/editor/prompts.py +146 -0
  18. ai_code_assistant/generator/__init__.py +7 -0
  19. ai_code_assistant/generator/code_gen.py +265 -0
  20. ai_code_assistant/generator/prompts.py +114 -0
  21. ai_code_assistant/git/__init__.py +6 -0
  22. ai_code_assistant/git/commit_generator.py +130 -0
  23. ai_code_assistant/git/manager.py +203 -0
  24. ai_code_assistant/llm.py +111 -0
  25. ai_code_assistant/providers/__init__.py +23 -0
  26. ai_code_assistant/providers/base.py +124 -0
  27. ai_code_assistant/providers/cerebras.py +97 -0
  28. ai_code_assistant/providers/factory.py +148 -0
  29. ai_code_assistant/providers/google.py +103 -0
  30. ai_code_assistant/providers/groq.py +111 -0
  31. ai_code_assistant/providers/ollama.py +86 -0
  32. ai_code_assistant/providers/openai.py +114 -0
  33. ai_code_assistant/providers/openrouter.py +130 -0
  34. ai_code_assistant/py.typed +0 -0
  35. ai_code_assistant/refactor/__init__.py +20 -0
  36. ai_code_assistant/refactor/analyzer.py +189 -0
  37. ai_code_assistant/refactor/change_plan.py +172 -0
  38. ai_code_assistant/refactor/multi_file_editor.py +346 -0
  39. ai_code_assistant/refactor/prompts.py +175 -0
  40. ai_code_assistant/retrieval/__init__.py +19 -0
  41. ai_code_assistant/retrieval/chunker.py +215 -0
  42. ai_code_assistant/retrieval/indexer.py +236 -0
  43. ai_code_assistant/retrieval/search.py +239 -0
  44. ai_code_assistant/reviewer/__init__.py +7 -0
  45. ai_code_assistant/reviewer/analyzer.py +278 -0
  46. ai_code_assistant/reviewer/prompts.py +113 -0
  47. ai_code_assistant/utils/__init__.py +18 -0
  48. ai_code_assistant/utils/file_handler.py +155 -0
  49. ai_code_assistant/utils/formatters.py +259 -0
  50. cognify_code-0.2.0.dist-info/METADATA +383 -0
  51. cognify_code-0.2.0.dist-info/RECORD +55 -0
  52. cognify_code-0.2.0.dist-info/WHEEL +5 -0
  53. cognify_code-0.2.0.dist-info/entry_points.txt +3 -0
  54. cognify_code-0.2.0.dist-info/licenses/LICENSE +22 -0
  55. cognify_code-0.2.0.dist-info/top_level.txt +1 -0
@@ -0,0 +1,215 @@
1
+ """
2
+ Code chunking strategies for semantic indexing.
3
+
4
+ Chunks code into meaningful segments for better retrieval.
5
+ """
6
+
7
+ import re
8
+ from dataclasses import dataclass, field
9
+ from typing import List, Optional
10
+ from pathlib import Path
11
+
12
+
13
+ @dataclass
14
+ class CodeChunk:
15
+ """Represents a chunk of code with metadata."""
16
+ content: str
17
+ file_path: str
18
+ start_line: int
19
+ end_line: int
20
+ chunk_type: str # function, class, module, block
21
+ name: Optional[str] = None
22
+ language: Optional[str] = None
23
+
24
+ @property
25
+ def id(self) -> str:
26
+ """Generate unique ID for this chunk."""
27
+ return f"{self.file_path}:{self.start_line}-{self.end_line}"
28
+
29
+ def to_dict(self) -> dict:
30
+ """Convert to dictionary for storage."""
31
+ return {
32
+ "content": self.content,
33
+ "file_path": self.file_path,
34
+ "start_line": self.start_line,
35
+ "end_line": self.end_line,
36
+ "chunk_type": self.chunk_type,
37
+ "name": self.name or "",
38
+ "language": self.language or "",
39
+ }
40
+
41
+
42
+ @dataclass
43
+ class CodeChunker:
44
+ """Splits code files into semantic chunks."""
45
+
46
+ chunk_size: int = 50 # Max lines per chunk
47
+ chunk_overlap: int = 10 # Overlap between chunks
48
+
49
+ # Language detection by extension
50
+ LANGUAGE_MAP: dict = field(default_factory=lambda: {
51
+ ".py": "python",
52
+ ".js": "javascript",
53
+ ".ts": "typescript",
54
+ ".jsx": "javascript",
55
+ ".tsx": "typescript",
56
+ ".java": "java",
57
+ ".go": "go",
58
+ ".rs": "rust",
59
+ ".rb": "ruby",
60
+ ".php": "php",
61
+ ".c": "c",
62
+ ".cpp": "cpp",
63
+ ".h": "c",
64
+ ".hpp": "cpp",
65
+ ".cs": "csharp",
66
+ ".swift": "swift",
67
+ ".kt": "kotlin",
68
+ ".scala": "scala",
69
+ ".sh": "bash",
70
+ ".yaml": "yaml",
71
+ ".yml": "yaml",
72
+ ".json": "json",
73
+ ".md": "markdown",
74
+ ".sql": "sql",
75
+ })
76
+
77
+ # Patterns for detecting code boundaries
78
+ PYTHON_PATTERNS = {
79
+ "class": r"^class\s+(\w+)",
80
+ "function": r"^(?:async\s+)?def\s+(\w+)",
81
+ }
82
+
83
+ def detect_language(self, file_path: str) -> Optional[str]:
84
+ """Detect programming language from file extension."""
85
+ ext = Path(file_path).suffix.lower()
86
+ return self.LANGUAGE_MAP.get(ext)
87
+
88
+ def chunk_file(self, file_path: str, content: str) -> List[CodeChunk]:
89
+ """Split a file into semantic chunks."""
90
+ language = self.detect_language(file_path)
91
+ lines = content.split("\n")
92
+
93
+ if language == "python":
94
+ return self._chunk_python(file_path, lines, language)
95
+ else:
96
+ return self._chunk_generic(file_path, lines, language)
97
+
98
+ def _chunk_python(self, file_path: str, lines: List[str], language: str) -> List[CodeChunk]:
99
+ """Chunk Python code by class/function boundaries."""
100
+ chunks = []
101
+ current_chunk_start = 0
102
+ current_chunk_lines = []
103
+ current_name = None
104
+ current_type = "module"
105
+
106
+ for i, line in enumerate(lines):
107
+ # Check for class definition
108
+ class_match = re.match(self.PYTHON_PATTERNS["class"], line)
109
+ func_match = re.match(self.PYTHON_PATTERNS["function"], line)
110
+
111
+ if class_match or func_match:
112
+ # Save previous chunk if exists
113
+ if current_chunk_lines:
114
+ chunks.append(CodeChunk(
115
+ content="\n".join(current_chunk_lines),
116
+ file_path=file_path,
117
+ start_line=current_chunk_start + 1,
118
+ end_line=i,
119
+ chunk_type=current_type,
120
+ name=current_name,
121
+ language=language,
122
+ ))
123
+
124
+ # Start new chunk
125
+ current_chunk_start = i
126
+ current_chunk_lines = [line]
127
+ if class_match:
128
+ current_name = class_match.group(1)
129
+ current_type = "class"
130
+ else:
131
+ current_name = func_match.group(1)
132
+ current_type = "function"
133
+ else:
134
+ current_chunk_lines.append(line)
135
+
136
+ # Check if chunk is too large
137
+ if len(current_chunk_lines) >= self.chunk_size:
138
+ chunks.append(CodeChunk(
139
+ content="\n".join(current_chunk_lines),
140
+ file_path=file_path,
141
+ start_line=current_chunk_start + 1,
142
+ end_line=i + 1,
143
+ chunk_type=current_type,
144
+ name=current_name,
145
+ language=language,
146
+ ))
147
+ # Start new chunk with overlap
148
+ overlap_start = max(0, len(current_chunk_lines) - self.chunk_overlap)
149
+ current_chunk_lines = current_chunk_lines[overlap_start:]
150
+ current_chunk_start = i - len(current_chunk_lines) + 1
151
+
152
+ # Add final chunk
153
+ if current_chunk_lines:
154
+ chunks.append(CodeChunk(
155
+ content="\n".join(current_chunk_lines),
156
+ file_path=file_path,
157
+ start_line=current_chunk_start + 1,
158
+ end_line=len(lines),
159
+ chunk_type=current_type,
160
+ name=current_name,
161
+ language=language,
162
+ ))
163
+
164
+ return chunks
165
+
166
+ def _chunk_generic(self, file_path: str, lines: List[str], language: Optional[str]) -> List[CodeChunk]:
167
+ """Chunk code using sliding window approach."""
168
+ chunks = []
169
+ total_lines = len(lines)
170
+
171
+ if total_lines == 0:
172
+ return chunks
173
+
174
+ # If file is small enough, return as single chunk
175
+ if total_lines <= self.chunk_size:
176
+ chunks.append(CodeChunk(
177
+ content="\n".join(lines),
178
+ file_path=file_path,
179
+ start_line=1,
180
+ end_line=total_lines,
181
+ chunk_type="module",
182
+ name=Path(file_path).stem,
183
+ language=language,
184
+ ))
185
+ return chunks
186
+
187
+ # Use sliding window
188
+ start = 0
189
+ while start < total_lines:
190
+ end = min(start + self.chunk_size, total_lines)
191
+ chunk_lines = lines[start:end]
192
+
193
+ chunks.append(CodeChunk(
194
+ content="\n".join(chunk_lines),
195
+ file_path=file_path,
196
+ start_line=start + 1,
197
+ end_line=end,
198
+ chunk_type="block",
199
+ name=f"{Path(file_path).stem}:{start+1}-{end}",
200
+ language=language,
201
+ ))
202
+
203
+ # Move window with overlap
204
+ start += self.chunk_size - self.chunk_overlap
205
+
206
+ # Avoid tiny final chunks
207
+ if total_lines - start < self.chunk_overlap:
208
+ break
209
+
210
+ return chunks
211
+
212
+ def chunk_text(self, text: str, file_path: str = "unknown") -> List[CodeChunk]:
213
+ """Convenience method to chunk text directly."""
214
+ return self.chunk_file(file_path, text)
215
+
@@ -0,0 +1,236 @@
1
+ """
2
+ Codebase indexer using ChromaDB and sentence-transformers.
3
+
4
+ Indexes code files for semantic search.
5
+ """
6
+
7
+ import os
8
+ import hashlib
9
+ from pathlib import Path
10
+ from typing import List, Optional, Set
11
+ from dataclasses import dataclass, field
12
+
13
+ import chromadb
14
+ from chromadb.config import Settings
15
+ from sentence_transformers import SentenceTransformer
16
+
17
+ from .chunker import CodeChunker, CodeChunk
18
+
19
+
20
+ @dataclass
21
+ class IndexStats:
22
+ """Statistics about the index."""
23
+ total_files: int = 0
24
+ total_chunks: int = 0
25
+ indexed_files: List[str] = field(default_factory=list)
26
+ skipped_files: List[str] = field(default_factory=list)
27
+ errors: List[str] = field(default_factory=list)
28
+
29
+
30
+ @dataclass
31
+ class IndexConfig:
32
+ """Configuration for the indexer."""
33
+ # Embedding model (runs locally)
34
+ embedding_model: str = "all-MiniLM-L6-v2"
35
+ # ChromaDB persistence directory
36
+ persist_directory: str = ".ai-assistant-index"
37
+ # Collection name
38
+ collection_name: str = "codebase"
39
+ # File extensions to index
40
+ extensions: Set[str] = field(default_factory=lambda: {
41
+ ".py", ".js", ".ts", ".jsx", ".tsx", ".java", ".go", ".rs",
42
+ ".rb", ".php", ".c", ".cpp", ".h", ".hpp", ".cs", ".swift",
43
+ ".kt", ".scala", ".sh", ".yaml", ".yml", ".json", ".md", ".sql",
44
+ ".html", ".css", ".scss", ".vue", ".svelte",
45
+ })
46
+ # Directories to ignore
47
+ ignore_dirs: Set[str] = field(default_factory=lambda: {
48
+ ".git", ".svn", ".hg", "node_modules", "__pycache__", ".pytest_cache",
49
+ ".mypy_cache", ".tox", ".venv", "venv", "env", ".env", "dist", "build",
50
+ "target", ".idea", ".vscode", "coverage", ".coverage", "htmlcov",
51
+ ".eggs", "*.egg-info", ".ai-assistant-index",
52
+ })
53
+ # Max file size in bytes (skip large files)
54
+ max_file_size: int = 1024 * 1024 # 1MB
55
+
56
+
57
+ class CodebaseIndexer:
58
+ """Indexes codebase into ChromaDB for semantic search."""
59
+
60
+ def __init__(self, config: Optional[IndexConfig] = None, root_path: Optional[str] = None):
61
+ """Initialize the indexer.
62
+
63
+ Args:
64
+ config: Index configuration
65
+ root_path: Root directory of the codebase
66
+ """
67
+ self.config = config or IndexConfig()
68
+ self.root_path = Path(root_path) if root_path else Path.cwd()
69
+ self.chunker = CodeChunker()
70
+
71
+ # Initialize embedding model (lazy load)
72
+ self._embedder: Optional[SentenceTransformer] = None
73
+
74
+ # Initialize ChromaDB
75
+ persist_path = self.root_path / self.config.persist_directory
76
+ self._client = chromadb.PersistentClient(
77
+ path=str(persist_path),
78
+ settings=Settings(anonymized_telemetry=False),
79
+ )
80
+ self._collection = self._client.get_or_create_collection(
81
+ name=self.config.collection_name,
82
+ metadata={"description": "Codebase index for semantic search"},
83
+ )
84
+
85
+ @property
86
+ def embedder(self) -> SentenceTransformer:
87
+ """Lazy load the embedding model."""
88
+ if self._embedder is None:
89
+ self._embedder = SentenceTransformer(self.config.embedding_model)
90
+ return self._embedder
91
+
92
+ def _should_index_file(self, file_path: Path) -> bool:
93
+ """Check if a file should be indexed."""
94
+ # Check extension
95
+ if file_path.suffix.lower() not in self.config.extensions:
96
+ return False
97
+
98
+ # Check if in ignored directory
99
+ for part in file_path.parts:
100
+ if part in self.config.ignore_dirs:
101
+ return False
102
+ # Handle glob patterns like *.egg-info
103
+ for pattern in self.config.ignore_dirs:
104
+ if "*" in pattern and file_path.match(pattern):
105
+ return False
106
+
107
+ # Check file size
108
+ try:
109
+ if file_path.stat().st_size > self.config.max_file_size:
110
+ return False
111
+ except OSError:
112
+ return False
113
+
114
+ return True
115
+
116
+ def _get_file_hash(self, content: str) -> str:
117
+ """Get hash of file content for change detection."""
118
+ return hashlib.md5(content.encode()).hexdigest()
119
+
120
+ def _read_file(self, file_path: Path) -> Optional[str]:
121
+ """Safely read a file."""
122
+ try:
123
+ return file_path.read_text(encoding="utf-8", errors="ignore")
124
+ except Exception:
125
+ return None
126
+
127
+ def index_file(self, file_path: Path, stats: IndexStats) -> bool:
128
+ """Index a single file.
129
+
130
+ Returns True if file was indexed, False if skipped.
131
+ """
132
+ content = self._read_file(file_path)
133
+ if content is None:
134
+ stats.errors.append(f"Could not read: {file_path}")
135
+ return False
136
+
137
+ # Get relative path for storage
138
+ try:
139
+ rel_path = str(file_path.relative_to(self.root_path))
140
+ except ValueError:
141
+ rel_path = str(file_path)
142
+
143
+ # Chunk the file
144
+ chunks = self.chunker.chunk_file(rel_path, content)
145
+ if not chunks:
146
+ stats.skipped_files.append(rel_path)
147
+ return False
148
+
149
+ # Generate embeddings
150
+ texts = [chunk.content for chunk in chunks]
151
+ embeddings = self.embedder.encode(texts, show_progress_bar=False).tolist()
152
+
153
+ # Prepare data for ChromaDB
154
+ ids = [chunk.id for chunk in chunks]
155
+ documents = texts
156
+ metadatas = [chunk.to_dict() for chunk in chunks]
157
+
158
+ # Delete old chunks for this file (if re-indexing)
159
+ try:
160
+ existing = self._collection.get(where={"file_path": rel_path})
161
+ if existing["ids"]:
162
+ self._collection.delete(ids=existing["ids"])
163
+ except Exception:
164
+ pass # Collection might be empty
165
+
166
+ # Add to collection
167
+ self._collection.add(
168
+ ids=ids,
169
+ documents=documents,
170
+ embeddings=embeddings,
171
+ metadatas=metadatas,
172
+ )
173
+
174
+ stats.indexed_files.append(rel_path)
175
+ stats.total_chunks += len(chunks)
176
+ return True
177
+
178
+ def index_directory(self, directory: Optional[Path] = None, verbose: bool = True) -> IndexStats:
179
+ """Index all files in a directory recursively.
180
+
181
+ Args:
182
+ directory: Directory to index (defaults to root_path)
183
+ verbose: Print progress
184
+
185
+ Returns:
186
+ IndexStats with results
187
+ """
188
+ directory = directory or self.root_path
189
+ stats = IndexStats()
190
+
191
+ # Find all files to index
192
+ files_to_index = []
193
+ for file_path in directory.rglob("*"):
194
+ if file_path.is_file() and self._should_index_file(file_path):
195
+ files_to_index.append(file_path)
196
+
197
+ stats.total_files = len(files_to_index)
198
+
199
+ if verbose:
200
+ print(f"Found {len(files_to_index)} files to index...")
201
+
202
+ # Index each file
203
+ for i, file_path in enumerate(files_to_index):
204
+ if verbose and (i + 1) % 10 == 0:
205
+ print(f" Indexed {i + 1}/{len(files_to_index)} files...")
206
+
207
+ try:
208
+ self.index_file(file_path, stats)
209
+ except Exception as e:
210
+ stats.errors.append(f"{file_path}: {str(e)}")
211
+
212
+ if verbose:
213
+ print(f"\n✓ Indexed {len(stats.indexed_files)} files ({stats.total_chunks} chunks)")
214
+ if stats.errors:
215
+ print(f"⚠ {len(stats.errors)} errors occurred")
216
+
217
+ return stats
218
+
219
+ def clear_index(self) -> None:
220
+ """Clear the entire index."""
221
+ self._client.delete_collection(self.config.collection_name)
222
+ self._collection = self._client.create_collection(
223
+ name=self.config.collection_name,
224
+ metadata={"description": "Codebase index for semantic search"},
225
+ )
226
+
227
+ def get_stats(self) -> dict:
228
+ """Get statistics about the current index."""
229
+ count = self._collection.count()
230
+ return {
231
+ "total_chunks": count,
232
+ "collection_name": self.config.collection_name,
233
+ "embedding_model": self.config.embedding_model,
234
+ "root_path": str(self.root_path),
235
+ }
236
+
@@ -0,0 +1,239 @@
1
+ """
2
+ Semantic search over indexed codebase.
3
+
4
+ Provides natural language search capabilities.
5
+ """
6
+
7
+ from dataclasses import dataclass, field
8
+ from typing import List, Optional
9
+ from pathlib import Path
10
+
11
+ import chromadb
12
+ from chromadb.config import Settings
13
+ from sentence_transformers import SentenceTransformer
14
+
15
+ from .indexer import IndexConfig
16
+
17
+
18
+ @dataclass
19
+ class SearchResult:
20
+ """A single search result."""
21
+ content: str
22
+ file_path: str
23
+ start_line: int
24
+ end_line: int
25
+ chunk_type: str
26
+ name: str
27
+ language: str
28
+ score: float # Similarity score (0-1, higher is better)
29
+
30
+ def __str__(self) -> str:
31
+ """Format result for display."""
32
+ return (
33
+ f"📄 {self.file_path}:{self.start_line}-{self.end_line} "
34
+ f"({self.chunk_type}: {self.name}) [score: {self.score:.3f}]"
35
+ )
36
+
37
+ def to_dict(self) -> dict:
38
+ """Convert to dictionary."""
39
+ return {
40
+ "content": self.content,
41
+ "file_path": self.file_path,
42
+ "start_line": self.start_line,
43
+ "end_line": self.end_line,
44
+ "chunk_type": self.chunk_type,
45
+ "name": self.name,
46
+ "language": self.language,
47
+ "score": self.score,
48
+ }
49
+
50
+
51
+ @dataclass
52
+ class SearchResponse:
53
+ """Response from a search query."""
54
+ query: str
55
+ results: List[SearchResult] = field(default_factory=list)
56
+ total_results: int = 0
57
+
58
+ @property
59
+ def has_results(self) -> bool:
60
+ return len(self.results) > 0
61
+
62
+ def format_for_llm(self, max_results: int = 5) -> str:
63
+ """Format results as context for LLM prompts."""
64
+ if not self.results:
65
+ return "No relevant code found."
66
+
67
+ parts = [f"Found {len(self.results)} relevant code sections:\n"]
68
+
69
+ for i, result in enumerate(self.results[:max_results], 1):
70
+ parts.append(f"\n--- Result {i}: {result.file_path}:{result.start_line}-{result.end_line} ---")
71
+ parts.append(f"Type: {result.chunk_type} | Name: {result.name}")
72
+ parts.append(f"```{result.language or ''}")
73
+ parts.append(result.content)
74
+ parts.append("```\n")
75
+
76
+ return "\n".join(parts)
77
+
78
+
79
+ class CodebaseSearch:
80
+ """Semantic search over indexed codebase."""
81
+
82
+ def __init__(self, config: Optional[IndexConfig] = None, root_path: Optional[str] = None):
83
+ """Initialize search.
84
+
85
+ Args:
86
+ config: Index configuration (must match indexer config)
87
+ root_path: Root directory of the codebase
88
+ """
89
+ self.config = config or IndexConfig()
90
+ self.root_path = Path(root_path) if root_path else Path.cwd()
91
+
92
+ # Initialize embedding model (lazy load)
93
+ self._embedder: Optional[SentenceTransformer] = None
94
+
95
+ # Connect to ChromaDB
96
+ persist_path = self.root_path / self.config.persist_directory
97
+ if not persist_path.exists():
98
+ raise FileNotFoundError(
99
+ f"Index not found at {persist_path}. Run 'ai-assist index' first."
100
+ )
101
+
102
+ self._client = chromadb.PersistentClient(
103
+ path=str(persist_path),
104
+ settings=Settings(anonymized_telemetry=False),
105
+ )
106
+
107
+ try:
108
+ self._collection = self._client.get_collection(self.config.collection_name)
109
+ except Exception:
110
+ raise FileNotFoundError(
111
+ f"Collection '{self.config.collection_name}' not found. Run 'ai-assist index' first."
112
+ )
113
+
114
+ @property
115
+ def embedder(self) -> SentenceTransformer:
116
+ """Lazy load the embedding model."""
117
+ if self._embedder is None:
118
+ self._embedder = SentenceTransformer(self.config.embedding_model)
119
+ return self._embedder
120
+
121
+ def search(
122
+ self,
123
+ query: str,
124
+ top_k: int = 10,
125
+ min_score: float = 0.0,
126
+ file_filter: Optional[str] = None,
127
+ language_filter: Optional[str] = None,
128
+ ) -> SearchResponse:
129
+ """Search for relevant code.
130
+
131
+ Args:
132
+ query: Natural language search query
133
+ top_k: Maximum number of results
134
+ min_score: Minimum similarity score (0-1)
135
+ file_filter: Filter by file path (substring match)
136
+ language_filter: Filter by programming language
137
+
138
+ Returns:
139
+ SearchResponse with results
140
+ """
141
+ # Generate query embedding
142
+ query_embedding = self.embedder.encode(query, show_progress_bar=False).tolist()
143
+
144
+ # Build where clause for filtering
145
+ where = None
146
+ if language_filter:
147
+ where = {"language": language_filter}
148
+
149
+ # Search ChromaDB
150
+ results = self._collection.query(
151
+ query_embeddings=[query_embedding],
152
+ n_results=top_k,
153
+ where=where,
154
+ include=["documents", "metadatas", "distances"],
155
+ )
156
+
157
+ # Process results
158
+ search_results = []
159
+
160
+ if results["documents"] and results["documents"][0]:
161
+ documents = results["documents"][0]
162
+ metadatas = results["metadatas"][0] if results["metadatas"] else [{}] * len(documents)
163
+ distances = results["distances"][0] if results["distances"] else [0] * len(documents)
164
+
165
+ for doc, meta, distance in zip(documents, metadatas, distances):
166
+ # Convert distance to similarity score (ChromaDB returns L2 distance)
167
+ # Lower distance = more similar, so we convert
168
+ score = 1 / (1 + distance)
169
+
170
+ # Apply minimum score filter
171
+ if score < min_score:
172
+ continue
173
+
174
+ # Apply file filter
175
+ file_path = meta.get("file_path", "")
176
+ if file_filter and file_filter.lower() not in file_path.lower():
177
+ continue
178
+
179
+ search_results.append(SearchResult(
180
+ content=doc,
181
+ file_path=file_path,
182
+ start_line=int(meta.get("start_line", 0)),
183
+ end_line=int(meta.get("end_line", 0)),
184
+ chunk_type=meta.get("chunk_type", "unknown"),
185
+ name=meta.get("name", ""),
186
+ language=meta.get("language", ""),
187
+ score=score,
188
+ ))
189
+
190
+ return SearchResponse(
191
+ query=query,
192
+ results=search_results,
193
+ total_results=len(search_results),
194
+ )
195
+
196
+ def search_similar(self, code: str, top_k: int = 5) -> SearchResponse:
197
+ """Find code similar to the given code snippet.
198
+
199
+ Args:
200
+ code: Code snippet to find similar code for
201
+ top_k: Maximum number of results
202
+
203
+ Returns:
204
+ SearchResponse with similar code
205
+ """
206
+ return self.search(query=code, top_k=top_k)
207
+
208
+ def get_file_context(self, file_path: str) -> List[SearchResult]:
209
+ """Get all indexed chunks for a specific file.
210
+
211
+ Args:
212
+ file_path: Path to the file
213
+
214
+ Returns:
215
+ List of SearchResult for the file
216
+ """
217
+ results = self._collection.get(
218
+ where={"file_path": file_path},
219
+ include=["documents", "metadatas"],
220
+ )
221
+
222
+ search_results = []
223
+ if results["documents"]:
224
+ for doc, meta in zip(results["documents"], results["metadatas"]):
225
+ search_results.append(SearchResult(
226
+ content=doc,
227
+ file_path=meta.get("file_path", ""),
228
+ start_line=int(meta.get("start_line", 0)),
229
+ end_line=int(meta.get("end_line", 0)),
230
+ chunk_type=meta.get("chunk_type", "unknown"),
231
+ name=meta.get("name", ""),
232
+ language=meta.get("language", ""),
233
+ score=1.0, # Not a similarity search
234
+ ))
235
+
236
+ # Sort by line number
237
+ search_results.sort(key=lambda x: x.start_line)
238
+ return search_results
239
+
@@ -0,0 +1,7 @@
1
+ """Code review module for AI Code Assistant."""
2
+
3
+ from ai_code_assistant.reviewer.analyzer import CodeAnalyzer, ReviewResult, ReviewIssue
4
+ from ai_code_assistant.reviewer.prompts import REVIEW_PROMPTS
5
+
6
+ __all__ = ["CodeAnalyzer", "ReviewResult", "ReviewIssue", "REVIEW_PROMPTS"]
7
+