ai-coding-assistant 0.5.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- ai_coding_assistant-0.5.0.dist-info/METADATA +226 -0
- ai_coding_assistant-0.5.0.dist-info/RECORD +89 -0
- ai_coding_assistant-0.5.0.dist-info/WHEEL +4 -0
- ai_coding_assistant-0.5.0.dist-info/entry_points.txt +3 -0
- ai_coding_assistant-0.5.0.dist-info/licenses/LICENSE +21 -0
- coding_assistant/__init__.py +3 -0
- coding_assistant/__main__.py +19 -0
- coding_assistant/cli/__init__.py +1 -0
- coding_assistant/cli/app.py +158 -0
- coding_assistant/cli/commands/__init__.py +19 -0
- coding_assistant/cli/commands/ask.py +178 -0
- coding_assistant/cli/commands/config.py +438 -0
- coding_assistant/cli/commands/diagram.py +267 -0
- coding_assistant/cli/commands/document.py +410 -0
- coding_assistant/cli/commands/explain.py +192 -0
- coding_assistant/cli/commands/fix.py +249 -0
- coding_assistant/cli/commands/index.py +162 -0
- coding_assistant/cli/commands/refactor.py +245 -0
- coding_assistant/cli/commands/search.py +182 -0
- coding_assistant/cli/commands/serve_docs.py +128 -0
- coding_assistant/cli/repl.py +381 -0
- coding_assistant/cli/theme.py +90 -0
- coding_assistant/codebase/__init__.py +1 -0
- coding_assistant/codebase/crawler.py +93 -0
- coding_assistant/codebase/parser.py +266 -0
- coding_assistant/config/__init__.py +25 -0
- coding_assistant/config/config_manager.py +615 -0
- coding_assistant/config/settings.py +82 -0
- coding_assistant/context/__init__.py +19 -0
- coding_assistant/context/chunker.py +443 -0
- coding_assistant/context/enhanced_retriever.py +322 -0
- coding_assistant/context/hybrid_search.py +311 -0
- coding_assistant/context/ranker.py +355 -0
- coding_assistant/context/retriever.py +119 -0
- coding_assistant/context/window.py +362 -0
- coding_assistant/documentation/__init__.py +23 -0
- coding_assistant/documentation/agents/__init__.py +27 -0
- coding_assistant/documentation/agents/coordinator.py +510 -0
- coding_assistant/documentation/agents/module_documenter.py +111 -0
- coding_assistant/documentation/agents/synthesizer.py +139 -0
- coding_assistant/documentation/agents/task_delegator.py +100 -0
- coding_assistant/documentation/decomposition/__init__.py +21 -0
- coding_assistant/documentation/decomposition/context_preserver.py +477 -0
- coding_assistant/documentation/decomposition/module_detector.py +302 -0
- coding_assistant/documentation/decomposition/partitioner.py +621 -0
- coding_assistant/documentation/generators/__init__.py +14 -0
- coding_assistant/documentation/generators/dataflow_generator.py +440 -0
- coding_assistant/documentation/generators/diagram_generator.py +511 -0
- coding_assistant/documentation/graph/__init__.py +13 -0
- coding_assistant/documentation/graph/dependency_builder.py +468 -0
- coding_assistant/documentation/graph/module_analyzer.py +475 -0
- coding_assistant/documentation/writers/__init__.py +11 -0
- coding_assistant/documentation/writers/markdown_writer.py +322 -0
- coding_assistant/embeddings/__init__.py +0 -0
- coding_assistant/embeddings/generator.py +89 -0
- coding_assistant/embeddings/store.py +187 -0
- coding_assistant/exceptions/__init__.py +50 -0
- coding_assistant/exceptions/base.py +110 -0
- coding_assistant/exceptions/llm.py +249 -0
- coding_assistant/exceptions/recovery.py +263 -0
- coding_assistant/exceptions/storage.py +213 -0
- coding_assistant/exceptions/validation.py +230 -0
- coding_assistant/llm/__init__.py +1 -0
- coding_assistant/llm/client.py +277 -0
- coding_assistant/llm/gemini_client.py +181 -0
- coding_assistant/llm/groq_client.py +160 -0
- coding_assistant/llm/prompts.py +98 -0
- coding_assistant/llm/together_client.py +160 -0
- coding_assistant/operations/__init__.py +13 -0
- coding_assistant/operations/differ.py +369 -0
- coding_assistant/operations/generator.py +347 -0
- coding_assistant/operations/linter.py +430 -0
- coding_assistant/operations/validator.py +406 -0
- coding_assistant/storage/__init__.py +9 -0
- coding_assistant/storage/database.py +363 -0
- coding_assistant/storage/session.py +231 -0
- coding_assistant/utils/__init__.py +31 -0
- coding_assistant/utils/cache.py +477 -0
- coding_assistant/utils/hardware.py +132 -0
- coding_assistant/utils/keystore.py +206 -0
- coding_assistant/utils/logger.py +32 -0
- coding_assistant/utils/progress.py +311 -0
- coding_assistant/validation/__init__.py +13 -0
- coding_assistant/validation/files.py +305 -0
- coding_assistant/validation/inputs.py +335 -0
- coding_assistant/validation/params.py +280 -0
- coding_assistant/validation/sanitizers.py +243 -0
- coding_assistant/vcs/__init__.py +5 -0
- coding_assistant/vcs/git.py +269 -0
|
@@ -0,0 +1,322 @@
|
|
|
1
|
+
"""Markdown documentation writer.
|
|
2
|
+
|
|
3
|
+
This module writes generated documentation and diagrams to markdown files
|
|
4
|
+
with proper structure, formatting, and navigation.
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
from pathlib import Path
|
|
8
|
+
from typing import Dict, List
|
|
9
|
+
from datetime import datetime
|
|
10
|
+
|
|
11
|
+
from coding_assistant.documentation.decomposition.partitioner import Partition
|
|
12
|
+
from coding_assistant.utils.logger import get_logger
|
|
13
|
+
|
|
14
|
+
logger = get_logger(__name__)
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
class MarkdownWriter:
|
|
18
|
+
"""
|
|
19
|
+
Write documentation to markdown files.
|
|
20
|
+
|
|
21
|
+
Creates:
|
|
22
|
+
- README.md (repository overview)
|
|
23
|
+
- modules/ (per-module documentation)
|
|
24
|
+
- diagrams/ (visual diagrams)
|
|
25
|
+
- index.md (navigation)
|
|
26
|
+
"""
|
|
27
|
+
|
|
28
|
+
def __init__(self, output_dir: Path):
|
|
29
|
+
"""
|
|
30
|
+
Initialize markdown writer.
|
|
31
|
+
|
|
32
|
+
Args:
|
|
33
|
+
output_dir: Directory to write documentation
|
|
34
|
+
"""
|
|
35
|
+
self.output_dir = Path(output_dir)
|
|
36
|
+
self.modules_dir = self.output_dir / "modules"
|
|
37
|
+
self.diagrams_dir = self.output_dir / "diagrams"
|
|
38
|
+
|
|
39
|
+
# Create directories
|
|
40
|
+
self.output_dir.mkdir(parents=True, exist_ok=True)
|
|
41
|
+
self.modules_dir.mkdir(exist_ok=True)
|
|
42
|
+
self.diagrams_dir.mkdir(exist_ok=True)
|
|
43
|
+
|
|
44
|
+
def write_documentation(self,
|
|
45
|
+
overview: str,
|
|
46
|
+
module_docs: Dict[str, str],
|
|
47
|
+
diagrams: Dict[str, str],
|
|
48
|
+
partitions: List[Partition],
|
|
49
|
+
repo_metadata: Dict) -> List[Path]:
|
|
50
|
+
"""
|
|
51
|
+
Write all documentation to files.
|
|
52
|
+
|
|
53
|
+
Args:
|
|
54
|
+
overview: Repository overview markdown
|
|
55
|
+
module_docs: Module name -> documentation mapping
|
|
56
|
+
diagrams: Diagram type -> mermaid code mapping
|
|
57
|
+
partitions: List of partitions
|
|
58
|
+
repo_metadata: Repository metadata
|
|
59
|
+
|
|
60
|
+
Returns:
|
|
61
|
+
List of paths to written files
|
|
62
|
+
"""
|
|
63
|
+
logger.info(f"Writing documentation to {self.output_dir}")
|
|
64
|
+
|
|
65
|
+
written_files = []
|
|
66
|
+
|
|
67
|
+
# 1. Write README.md (overview)
|
|
68
|
+
readme_path = self._write_readme(overview, diagrams, repo_metadata)
|
|
69
|
+
written_files.append(readme_path)
|
|
70
|
+
|
|
71
|
+
# 2. Write module documentation
|
|
72
|
+
module_paths = self._write_modules(module_docs, partitions)
|
|
73
|
+
written_files.extend(module_paths)
|
|
74
|
+
|
|
75
|
+
# 3. Write diagrams
|
|
76
|
+
diagram_paths = self._write_diagrams(diagrams)
|
|
77
|
+
written_files.extend(diagram_paths)
|
|
78
|
+
|
|
79
|
+
# 4. Write index/navigation
|
|
80
|
+
index_path = self._write_index(module_docs, diagrams, repo_metadata)
|
|
81
|
+
written_files.append(index_path)
|
|
82
|
+
|
|
83
|
+
logger.info(f"Wrote {len(written_files)} documentation files")
|
|
84
|
+
|
|
85
|
+
return written_files
|
|
86
|
+
|
|
87
|
+
def _write_readme(self,
|
|
88
|
+
overview: str,
|
|
89
|
+
diagrams: Dict[str, str],
|
|
90
|
+
repo_metadata: Dict) -> Path:
|
|
91
|
+
"""Write README.md with repository overview."""
|
|
92
|
+
|
|
93
|
+
readme_path = self.output_dir / "README.md"
|
|
94
|
+
|
|
95
|
+
content = []
|
|
96
|
+
|
|
97
|
+
# Header
|
|
98
|
+
repo_name = repo_metadata.get('name', 'Repository')
|
|
99
|
+
content.append(f"# {repo_name} Documentation")
|
|
100
|
+
content.append("")
|
|
101
|
+
content.append(f"*Generated on {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}*")
|
|
102
|
+
content.append("")
|
|
103
|
+
|
|
104
|
+
# Overview
|
|
105
|
+
content.append(overview)
|
|
106
|
+
content.append("")
|
|
107
|
+
|
|
108
|
+
# Architecture diagram (if available)
|
|
109
|
+
if 'architecture' in diagrams:
|
|
110
|
+
content.append("## Architecture")
|
|
111
|
+
content.append("")
|
|
112
|
+
content.append("```mermaid")
|
|
113
|
+
content.append(diagrams['architecture'])
|
|
114
|
+
content.append("```")
|
|
115
|
+
content.append("")
|
|
116
|
+
|
|
117
|
+
# Navigation
|
|
118
|
+
content.append("## Documentation")
|
|
119
|
+
content.append("")
|
|
120
|
+
content.append("- [Module Index](index.md) - Browse all modules")
|
|
121
|
+
content.append("- [Modules](modules/) - Detailed module documentation")
|
|
122
|
+
content.append("- [Diagrams](diagrams/) - Visual diagrams")
|
|
123
|
+
content.append("")
|
|
124
|
+
|
|
125
|
+
# Repository stats
|
|
126
|
+
content.append("## Repository Statistics")
|
|
127
|
+
content.append("")
|
|
128
|
+
content.append(f"- **Total Files**: {repo_metadata.get('total_files', 'N/A')}")
|
|
129
|
+
content.append(f"- **Modules Detected**: {repo_metadata.get('total_modules', 'N/A')}")
|
|
130
|
+
content.append(f"- **Partitions**: {repo_metadata.get('total_partitions', 'N/A')}")
|
|
131
|
+
content.append("")
|
|
132
|
+
|
|
133
|
+
# Footer
|
|
134
|
+
content.append("---")
|
|
135
|
+
content.append("")
|
|
136
|
+
content.append("*This documentation was auto-generated using CodeWiki integration.*")
|
|
137
|
+
content.append("")
|
|
138
|
+
|
|
139
|
+
readme_path.write_text("\\n".join(content))
|
|
140
|
+
|
|
141
|
+
logger.debug(f"Wrote README.md ({len(content)} lines)")
|
|
142
|
+
|
|
143
|
+
return readme_path
|
|
144
|
+
|
|
145
|
+
def _write_modules(self,
|
|
146
|
+
module_docs: Dict[str, str],
|
|
147
|
+
partitions: List[Partition]) -> List[Path]:
|
|
148
|
+
"""Write individual module documentation files."""
|
|
149
|
+
|
|
150
|
+
written_paths = []
|
|
151
|
+
|
|
152
|
+
# Create partition lookup
|
|
153
|
+
partition_map = {p.name: p for p in partitions}
|
|
154
|
+
|
|
155
|
+
for module_name, doc_content in module_docs.items():
|
|
156
|
+
# Sanitize module name for filename
|
|
157
|
+
safe_name = self._sanitize_filename(module_name)
|
|
158
|
+
module_path = self.modules_dir / f"{safe_name}.md"
|
|
159
|
+
|
|
160
|
+
content = []
|
|
161
|
+
|
|
162
|
+
# Header
|
|
163
|
+
content.append(f"# {module_name}")
|
|
164
|
+
content.append("")
|
|
165
|
+
|
|
166
|
+
# Metadata (if partition available)
|
|
167
|
+
partition = partition_map.get(module_name)
|
|
168
|
+
if partition:
|
|
169
|
+
content.append("## Module Information")
|
|
170
|
+
content.append("")
|
|
171
|
+
content.append(f"- **Files**: {len(partition.files)}")
|
|
172
|
+
content.append(f"- **Lines of Code**: {partition.size_loc}")
|
|
173
|
+
content.append(f"- **Cohesion Score**: {partition.cohesion_score:.2f}")
|
|
174
|
+
content.append(f"- **Dependencies**: {len(partition.dependencies)}")
|
|
175
|
+
content.append("")
|
|
176
|
+
|
|
177
|
+
# File list
|
|
178
|
+
if partition.files:
|
|
179
|
+
content.append("### Files in Module")
|
|
180
|
+
content.append("")
|
|
181
|
+
for file_path in partition.files[:20]: # Limit to 20
|
|
182
|
+
rel_path = Path(file_path).name
|
|
183
|
+
content.append(f"- `{rel_path}`")
|
|
184
|
+
|
|
185
|
+
if len(partition.files) > 20:
|
|
186
|
+
content.append(f"- *... and {len(partition.files) - 20} more files*")
|
|
187
|
+
|
|
188
|
+
content.append("")
|
|
189
|
+
|
|
190
|
+
# Main documentation
|
|
191
|
+
content.append("## Documentation")
|
|
192
|
+
content.append("")
|
|
193
|
+
content.append(doc_content)
|
|
194
|
+
content.append("")
|
|
195
|
+
|
|
196
|
+
# Navigation
|
|
197
|
+
content.append("---")
|
|
198
|
+
content.append("")
|
|
199
|
+
content.append("[← Back to Index](../index.md)")
|
|
200
|
+
content.append("")
|
|
201
|
+
|
|
202
|
+
module_path.write_text("\\n".join(content))
|
|
203
|
+
written_paths.append(module_path)
|
|
204
|
+
|
|
205
|
+
logger.debug(f"Wrote {len(written_paths)} module documentation files")
|
|
206
|
+
|
|
207
|
+
return written_paths
|
|
208
|
+
|
|
209
|
+
def _write_diagrams(self, diagrams: Dict[str, str]) -> List[Path]:
|
|
210
|
+
"""Write diagram files in mermaid format."""
|
|
211
|
+
|
|
212
|
+
written_paths = []
|
|
213
|
+
|
|
214
|
+
for diagram_type, diagram_code in diagrams.items():
|
|
215
|
+
diagram_path = self.diagrams_dir / f"{diagram_type}.mmd"
|
|
216
|
+
|
|
217
|
+
diagram_path.write_text(diagram_code)
|
|
218
|
+
written_paths.append(diagram_path)
|
|
219
|
+
|
|
220
|
+
# Also create markdown file with embedded diagram
|
|
221
|
+
md_path = self.diagrams_dir / f"{diagram_type}.md"
|
|
222
|
+
|
|
223
|
+
content = []
|
|
224
|
+
content.append(f"# {diagram_type.title()} Diagram")
|
|
225
|
+
content.append("")
|
|
226
|
+
content.append(f"*Generated diagram for {diagram_type}*")
|
|
227
|
+
content.append("")
|
|
228
|
+
content.append("```mermaid")
|
|
229
|
+
content.append(diagram_code)
|
|
230
|
+
content.append("```")
|
|
231
|
+
content.append("")
|
|
232
|
+
content.append("---")
|
|
233
|
+
content.append("")
|
|
234
|
+
content.append(f"[Download .mmd file]({diagram_type}.mmd) | [Back to Index](../index.md)")
|
|
235
|
+
content.append("")
|
|
236
|
+
|
|
237
|
+
md_path.write_text("\\n".join(content))
|
|
238
|
+
written_paths.append(md_path)
|
|
239
|
+
|
|
240
|
+
logger.debug(f"Wrote {len(written_paths)} diagram files")
|
|
241
|
+
|
|
242
|
+
return written_paths
|
|
243
|
+
|
|
244
|
+
def _write_index(self,
|
|
245
|
+
module_docs: Dict[str, str],
|
|
246
|
+
diagrams: Dict[str, str],
|
|
247
|
+
repo_metadata: Dict) -> Path:
|
|
248
|
+
"""Write index.md with navigation."""
|
|
249
|
+
|
|
250
|
+
index_path = self.output_dir / "index.md"
|
|
251
|
+
|
|
252
|
+
content = []
|
|
253
|
+
|
|
254
|
+
# Header
|
|
255
|
+
content.append("# Documentation Index")
|
|
256
|
+
content.append("")
|
|
257
|
+
content.append(f"*{repo_metadata.get('name', 'Repository')} Documentation*")
|
|
258
|
+
content.append("")
|
|
259
|
+
|
|
260
|
+
# Modules section
|
|
261
|
+
content.append("## Modules")
|
|
262
|
+
content.append("")
|
|
263
|
+
content.append(f"This repository contains {len(module_docs)} documented modules:")
|
|
264
|
+
content.append("")
|
|
265
|
+
|
|
266
|
+
# Sort modules alphabetically
|
|
267
|
+
sorted_modules = sorted(module_docs.keys())
|
|
268
|
+
|
|
269
|
+
for module_name in sorted_modules:
|
|
270
|
+
safe_name = self._sanitize_filename(module_name)
|
|
271
|
+
content.append(f"- [{module_name}](modules/{safe_name}.md)")
|
|
272
|
+
|
|
273
|
+
content.append("")
|
|
274
|
+
|
|
275
|
+
# Diagrams section
|
|
276
|
+
if diagrams:
|
|
277
|
+
content.append("## Diagrams")
|
|
278
|
+
content.append("")
|
|
279
|
+
content.append("Visual representations of the codebase:")
|
|
280
|
+
content.append("")
|
|
281
|
+
|
|
282
|
+
for diagram_type in sorted(diagrams.keys()):
|
|
283
|
+
content.append(f"- [{diagram_type.title()}](diagrams/{diagram_type}.md)")
|
|
284
|
+
|
|
285
|
+
content.append("")
|
|
286
|
+
|
|
287
|
+
# Quick links
|
|
288
|
+
content.append("## Quick Links")
|
|
289
|
+
content.append("")
|
|
290
|
+
content.append("- [Repository Overview](README.md)")
|
|
291
|
+
content.append("")
|
|
292
|
+
|
|
293
|
+
# Footer
|
|
294
|
+
content.append("---")
|
|
295
|
+
content.append("")
|
|
296
|
+
content.append(f"*Total: {len(module_docs)} modules, {len(diagrams)} diagrams*")
|
|
297
|
+
content.append("")
|
|
298
|
+
|
|
299
|
+
index_path.write_text("\\n".join(content))
|
|
300
|
+
|
|
301
|
+
logger.debug("Wrote index.md")
|
|
302
|
+
|
|
303
|
+
return index_path
|
|
304
|
+
|
|
305
|
+
def _sanitize_filename(self, name: str) -> str:
|
|
306
|
+
"""
|
|
307
|
+
Sanitize module name for use as filename.
|
|
308
|
+
|
|
309
|
+
Replaces special characters with underscores.
|
|
310
|
+
"""
|
|
311
|
+
import re
|
|
312
|
+
|
|
313
|
+
# Replace path separators, dots, spaces with underscores
|
|
314
|
+
sanitized = re.sub(r'[/\\\\.\\s]', '_', name)
|
|
315
|
+
|
|
316
|
+
# Remove any remaining special characters
|
|
317
|
+
sanitized = re.sub(r'[^a-zA-Z0-9_-]', '', sanitized)
|
|
318
|
+
|
|
319
|
+
# Remove leading/trailing underscores
|
|
320
|
+
sanitized = sanitized.strip('_')
|
|
321
|
+
|
|
322
|
+
return sanitized or 'unnamed_module'
|
|
File without changes
|
|
@@ -0,0 +1,89 @@
|
|
|
1
|
+
"""Embedding generation using sentence-transformers (local, no API needed)."""
|
|
2
|
+
from typing import List, Dict
|
|
3
|
+
import numpy as np
|
|
4
|
+
from sentence_transformers import SentenceTransformer
|
|
5
|
+
from rich.console import Console
|
|
6
|
+
|
|
7
|
+
console = Console()
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
class EmbeddingGenerator:
|
|
11
|
+
"""Generate embeddings for code using local sentence-transformers."""
|
|
12
|
+
|
|
13
|
+
def __init__(self, model_name: str = "all-MiniLM-L6-v2"):
|
|
14
|
+
"""
|
|
15
|
+
Initialize the embedding generator.
|
|
16
|
+
|
|
17
|
+
Args:
|
|
18
|
+
model_name: Model to use. Default is all-MiniLM-L6-v2
|
|
19
|
+
(fast, 384 dimensions, good for code)
|
|
20
|
+
"""
|
|
21
|
+
console.print(f"[dim]Loading embedding model: [bold #06B6D4]{model_name}[/bold #06B6D4]...[/dim]")
|
|
22
|
+
self.model = SentenceTransformer(model_name)
|
|
23
|
+
self.dimension = self.model.get_sentence_embedding_dimension()
|
|
24
|
+
console.print(f"[bold #10B981]✓[/bold #10B981] [dim]Model loaded (dimension: {self.dimension})[/dim]")
|
|
25
|
+
|
|
26
|
+
def generate_embedding(self, text: str) -> List[float]:
|
|
27
|
+
"""
|
|
28
|
+
Generate embedding for a single text.
|
|
29
|
+
|
|
30
|
+
Args:
|
|
31
|
+
text: Text to embed
|
|
32
|
+
|
|
33
|
+
Returns:
|
|
34
|
+
Embedding vector as list of floats
|
|
35
|
+
"""
|
|
36
|
+
embedding = self.model.encode(text, convert_to_numpy=True)
|
|
37
|
+
return embedding.tolist()
|
|
38
|
+
|
|
39
|
+
def generate_embeddings_batch(self, texts: List[str]) -> List[List[float]]:
|
|
40
|
+
"""
|
|
41
|
+
Generate embeddings for multiple texts (more efficient).
|
|
42
|
+
|
|
43
|
+
Args:
|
|
44
|
+
texts: List of texts to embed
|
|
45
|
+
|
|
46
|
+
Returns:
|
|
47
|
+
List of embedding vectors
|
|
48
|
+
"""
|
|
49
|
+
embeddings = self.model.encode(
|
|
50
|
+
texts,
|
|
51
|
+
convert_to_numpy=True,
|
|
52
|
+
show_progress_bar=len(texts) > 10
|
|
53
|
+
)
|
|
54
|
+
return embeddings.tolist()
|
|
55
|
+
|
|
56
|
+
def embed_code_chunks(self, chunks: List[Dict]) -> List[Dict]:
|
|
57
|
+
"""
|
|
58
|
+
Embed code chunks with metadata.
|
|
59
|
+
|
|
60
|
+
Args:
|
|
61
|
+
chunks: List of code chunks from parser
|
|
62
|
+
|
|
63
|
+
Returns:
|
|
64
|
+
Chunks with embeddings added
|
|
65
|
+
"""
|
|
66
|
+
if not chunks:
|
|
67
|
+
return []
|
|
68
|
+
|
|
69
|
+
# Extract texts to embed
|
|
70
|
+
texts = []
|
|
71
|
+
for chunk in chunks:
|
|
72
|
+
# Combine type, name (if exists), and content for embedding
|
|
73
|
+
text_parts = [chunk['type']]
|
|
74
|
+
|
|
75
|
+
if 'name' in chunk:
|
|
76
|
+
text_parts.append(chunk['name'])
|
|
77
|
+
|
|
78
|
+
text_parts.append(chunk['content'])
|
|
79
|
+
|
|
80
|
+
texts.append(' '.join(text_parts))
|
|
81
|
+
|
|
82
|
+
# Generate embeddings in batch
|
|
83
|
+
embeddings = self.generate_embeddings_batch(texts)
|
|
84
|
+
|
|
85
|
+
# Add embeddings to chunks
|
|
86
|
+
for chunk, embedding in zip(chunks, embeddings):
|
|
87
|
+
chunk['embedding'] = embedding
|
|
88
|
+
|
|
89
|
+
return chunks
|
|
@@ -0,0 +1,187 @@
|
|
|
1
|
+
"""Vector store using ChromaDB for semantic code search."""
|
|
2
|
+
from pathlib import Path
|
|
3
|
+
from typing import List, Dict, Optional
|
|
4
|
+
import chromadb
|
|
5
|
+
from chromadb.config import Settings
|
|
6
|
+
import hashlib
|
|
7
|
+
import warnings
|
|
8
|
+
import sys
|
|
9
|
+
import os
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
class VectorStore:
|
|
13
|
+
"""Manages code embeddings in ChromaDB."""
|
|
14
|
+
|
|
15
|
+
def __init__(self, persist_dir: Optional[Path] = None):
|
|
16
|
+
"""
|
|
17
|
+
Initialize ChromaDB vector store.
|
|
18
|
+
|
|
19
|
+
Args:
|
|
20
|
+
persist_dir: Directory to persist the database
|
|
21
|
+
"""
|
|
22
|
+
if persist_dir is None:
|
|
23
|
+
persist_dir = Path.home() / ".coding_assistant" / "chroma_db"
|
|
24
|
+
|
|
25
|
+
persist_dir.mkdir(parents=True, exist_ok=True)
|
|
26
|
+
|
|
27
|
+
# Suppress ChromaDB telemetry warnings during initialization
|
|
28
|
+
# Save original stderr
|
|
29
|
+
original_stderr = sys.stderr
|
|
30
|
+
try:
|
|
31
|
+
# Redirect stderr to devnull during initialization
|
|
32
|
+
sys.stderr = open(os.devnull, 'w')
|
|
33
|
+
|
|
34
|
+
self.client = chromadb.PersistentClient(
|
|
35
|
+
path=str(persist_dir),
|
|
36
|
+
settings=Settings(
|
|
37
|
+
anonymized_telemetry=False,
|
|
38
|
+
allow_reset=True
|
|
39
|
+
)
|
|
40
|
+
)
|
|
41
|
+
|
|
42
|
+
# Get or create collection
|
|
43
|
+
self.collection = self.client.get_or_create_collection(
|
|
44
|
+
name="codebase",
|
|
45
|
+
metadata={"hnsw:space": "cosine"} # Use cosine similarity
|
|
46
|
+
)
|
|
47
|
+
finally:
|
|
48
|
+
# Restore original stderr
|
|
49
|
+
sys.stderr.close()
|
|
50
|
+
sys.stderr = original_stderr
|
|
51
|
+
|
|
52
|
+
from rich.console import Console
|
|
53
|
+
console = Console()
|
|
54
|
+
console.print(f"[bold #10B981]✓[/bold #10B981] [dim]ChromaDB initialized ({self.collection.count()} chunks)[/dim]")
|
|
55
|
+
|
|
56
|
+
def add_chunks(self, chunks: List[Dict]):
|
|
57
|
+
"""
|
|
58
|
+
Add code chunks with embeddings to the store.
|
|
59
|
+
|
|
60
|
+
Args:
|
|
61
|
+
chunks: List of chunks with embeddings
|
|
62
|
+
"""
|
|
63
|
+
if not chunks:
|
|
64
|
+
return
|
|
65
|
+
|
|
66
|
+
ids = []
|
|
67
|
+
embeddings = []
|
|
68
|
+
documents = []
|
|
69
|
+
metadatas = []
|
|
70
|
+
|
|
71
|
+
for chunk in chunks:
|
|
72
|
+
# Create unique ID from file path + chunk type + position
|
|
73
|
+
chunk_id = self._generate_chunk_id(chunk)
|
|
74
|
+
ids.append(chunk_id)
|
|
75
|
+
|
|
76
|
+
embeddings.append(chunk['embedding'])
|
|
77
|
+
documents.append(chunk['content'])
|
|
78
|
+
|
|
79
|
+
# Store metadata (without embedding to save space)
|
|
80
|
+
metadata = {
|
|
81
|
+
'file_path': chunk['file_path'],
|
|
82
|
+
'type': chunk['type'],
|
|
83
|
+
'start_line': chunk['start_line'],
|
|
84
|
+
'end_line': chunk['end_line']
|
|
85
|
+
}
|
|
86
|
+
|
|
87
|
+
if 'name' in chunk:
|
|
88
|
+
metadata['name'] = chunk['name']
|
|
89
|
+
|
|
90
|
+
metadatas.append(metadata)
|
|
91
|
+
|
|
92
|
+
# Add to ChromaDB
|
|
93
|
+
self.collection.add(
|
|
94
|
+
ids=ids,
|
|
95
|
+
embeddings=embeddings,
|
|
96
|
+
documents=documents,
|
|
97
|
+
metadatas=metadatas
|
|
98
|
+
)
|
|
99
|
+
|
|
100
|
+
print(f"✓ Added {len(chunks)} chunks to vector store")
|
|
101
|
+
|
|
102
|
+
def search(
|
|
103
|
+
self,
|
|
104
|
+
query_embedding: List[float],
|
|
105
|
+
n_results: int = 10,
|
|
106
|
+
filter_metadata: Optional[Dict] = None
|
|
107
|
+
) -> List[Dict]:
|
|
108
|
+
"""
|
|
109
|
+
Search for similar code chunks.
|
|
110
|
+
|
|
111
|
+
Args:
|
|
112
|
+
query_embedding: Query embedding vector
|
|
113
|
+
n_results: Number of results to return
|
|
114
|
+
filter_metadata: Optional metadata filters
|
|
115
|
+
|
|
116
|
+
Returns:
|
|
117
|
+
List of matching chunks with similarity scores
|
|
118
|
+
"""
|
|
119
|
+
# Suppress ChromaDB warnings during query
|
|
120
|
+
original_stderr = sys.stderr
|
|
121
|
+
try:
|
|
122
|
+
sys.stderr = open(os.devnull, 'w')
|
|
123
|
+
|
|
124
|
+
results = self.collection.query(
|
|
125
|
+
query_embeddings=[query_embedding],
|
|
126
|
+
n_results=n_results,
|
|
127
|
+
where=filter_metadata,
|
|
128
|
+
include=['documents', 'metadatas', 'distances']
|
|
129
|
+
)
|
|
130
|
+
finally:
|
|
131
|
+
sys.stderr.close()
|
|
132
|
+
sys.stderr = original_stderr
|
|
133
|
+
|
|
134
|
+
return self._format_results(results)
|
|
135
|
+
|
|
136
|
+
def delete_by_file(self, file_path: str):
|
|
137
|
+
"""
|
|
138
|
+
Delete all chunks from a specific file.
|
|
139
|
+
|
|
140
|
+
Args:
|
|
141
|
+
file_path: Path to the file
|
|
142
|
+
"""
|
|
143
|
+
# Get all IDs for this file
|
|
144
|
+
results = self.collection.get(
|
|
145
|
+
where={"file_path": file_path}
|
|
146
|
+
)
|
|
147
|
+
|
|
148
|
+
if results['ids']:
|
|
149
|
+
self.collection.delete(ids=results['ids'])
|
|
150
|
+
print(f"✓ Deleted {len(results['ids'])} chunks from {file_path}")
|
|
151
|
+
|
|
152
|
+
def clear(self):
|
|
153
|
+
"""Clear all chunks from the store."""
|
|
154
|
+
self.client.delete_collection("codebase")
|
|
155
|
+
self.collection = self.client.create_collection(
|
|
156
|
+
name="codebase",
|
|
157
|
+
metadata={"hnsw:space": "cosine"}
|
|
158
|
+
)
|
|
159
|
+
print("✓ Vector store cleared")
|
|
160
|
+
|
|
161
|
+
def count(self) -> int:
|
|
162
|
+
"""Get total number of chunks in the store."""
|
|
163
|
+
return self.collection.count()
|
|
164
|
+
|
|
165
|
+
def _generate_chunk_id(self, chunk: Dict) -> str:
|
|
166
|
+
"""Generate unique ID for a chunk."""
|
|
167
|
+
# Use file path + type + start line
|
|
168
|
+
id_string = f"{chunk['file_path']}:{chunk['type']}:{chunk['start_line']}"
|
|
169
|
+
return hashlib.md5(id_string.encode()).hexdigest()
|
|
170
|
+
|
|
171
|
+
def _format_results(self, raw_results: Dict) -> List[Dict]:
|
|
172
|
+
"""Format ChromaDB results into usable structure."""
|
|
173
|
+
formatted = []
|
|
174
|
+
|
|
175
|
+
if not raw_results['ids'] or not raw_results['ids'][0]:
|
|
176
|
+
return formatted
|
|
177
|
+
|
|
178
|
+
for i in range(len(raw_results['ids'][0])):
|
|
179
|
+
formatted.append({
|
|
180
|
+
'id': raw_results['ids'][0][i],
|
|
181
|
+
'content': raw_results['documents'][0][i],
|
|
182
|
+
'metadata': raw_results['metadatas'][0][i],
|
|
183
|
+
'distance': raw_results['distances'][0][i],
|
|
184
|
+
'similarity': 1 - raw_results['distances'][0][i] # Convert distance to similarity
|
|
185
|
+
})
|
|
186
|
+
|
|
187
|
+
return formatted
|
|
@@ -0,0 +1,50 @@
|
|
|
1
|
+
"""Custom exceptions for the AI Coding Assistant."""
|
|
2
|
+
|
|
3
|
+
from coding_assistant.exceptions.base import AssistantError
|
|
4
|
+
from coding_assistant.exceptions.llm import (
|
|
5
|
+
LLMError,
|
|
6
|
+
LLMConnectionError,
|
|
7
|
+
LLMResponseError,
|
|
8
|
+
LLMTimeoutError
|
|
9
|
+
)
|
|
10
|
+
from coding_assistant.exceptions.storage import (
|
|
11
|
+
StorageError,
|
|
12
|
+
IndexError,
|
|
13
|
+
IndexNotFoundError,
|
|
14
|
+
SessionError
|
|
15
|
+
)
|
|
16
|
+
from coding_assistant.exceptions.validation import (
|
|
17
|
+
ValidationError,
|
|
18
|
+
FileNotFoundError,
|
|
19
|
+
InvalidQueryError,
|
|
20
|
+
InvalidParameterError,
|
|
21
|
+
ConfigurationError
|
|
22
|
+
)
|
|
23
|
+
from coding_assistant.exceptions.recovery import ErrorRecovery
|
|
24
|
+
|
|
25
|
+
__all__ = [
|
|
26
|
+
# Base
|
|
27
|
+
'AssistantError',
|
|
28
|
+
|
|
29
|
+
# LLM
|
|
30
|
+
'LLMError',
|
|
31
|
+
'LLMConnectionError',
|
|
32
|
+
'LLMResponseError',
|
|
33
|
+
'LLMTimeoutError',
|
|
34
|
+
|
|
35
|
+
# Storage
|
|
36
|
+
'StorageError',
|
|
37
|
+
'IndexError',
|
|
38
|
+
'IndexNotFoundError',
|
|
39
|
+
'SessionError',
|
|
40
|
+
|
|
41
|
+
# Validation
|
|
42
|
+
'ValidationError',
|
|
43
|
+
'FileNotFoundError',
|
|
44
|
+
'InvalidQueryError',
|
|
45
|
+
'InvalidParameterError',
|
|
46
|
+
'ConfigurationError',
|
|
47
|
+
|
|
48
|
+
# Recovery
|
|
49
|
+
'ErrorRecovery',
|
|
50
|
+
]
|